diff options
Diffstat (limited to 'arch/x86/include')
439 files changed, 29410 insertions, 23460 deletions
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h new file mode 100644 index 000000000000..07949102a08d --- /dev/null +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are in machine order; things rely on that. + */ +#ifdef CONFIG_64BIT +GEN(rax) +GEN(rcx) +GEN(rdx) +GEN(rbx) +GEN(rsp) +GEN(rbp) +GEN(rsi) +GEN(rdi) +GEN(r8) +GEN(r9) +GEN(r10) +GEN(r11) +GEN(r12) +GEN(r13) +GEN(r14) +GEN(r15) +#else +GEN(eax) +GEN(ecx) +GEN(edx) +GEN(ebx) +GEN(esp) +GEN(ebp) +GEN(esi) +GEN(edi) +#endif diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a0ab9ab61c75..4566000e15c4 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -1,13 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +generated-y += orc_hash.h generated-y += syscalls_32.h generated-y += syscalls_64.h +generated-y += syscalls_x32.h generated-y += unistd_32_ia32.h generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h +generated-y += cpufeaturemasks.h -generic-y += dma-contiguous.h generic-y += early_ioremap.h -generic-y += export.h +generic-y += fprobe.h generic-y += mcs_spinlock.h -generic-y += mm-arch-hooks.h +generic-y += mmzone.h diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h deleted file mode 100644 index 7d3ece8bfb61..000000000000 --- a/arch/x86/include/asm/a.out-core.h +++ /dev/null @@ -1,67 +0,0 @@ -/* a.out coredump register dumper - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _ASM_X86_A_OUT_CORE_H -#define _ASM_X86_A_OUT_CORE_H - -#ifdef __KERNEL__ -#ifdef CONFIG_X86_32 - -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/mm_types.h> - -#include <asm/debugreg.h> - -/* - * fill in the user structure for an a.out core dump - */ -static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) -{ -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1))) - >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - aout_dump_debugregs(dump); - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) - >> PAGE_SHIFT; - - dump->regs.bx = regs->bx; - dump->regs.cx = regs->cx; - dump->regs.dx = regs->dx; - dump->regs.si = regs->si; - dump->regs.di = regs->di; - dump->regs.bp = regs->bp; - dump->regs.ax = regs->ax; - dump->regs.ds = (u16)regs->ds; - dump->regs.es = (u16)regs->es; - dump->regs.fs = (u16)regs->fs; - dump->regs.gs = get_user_gs(regs); - dump->regs.orig_ax = regs->orig_ax; - dump->regs.ip = regs->ip; - dump->regs.cs = (u16)regs->cs; - dump->regs.flags = regs->flags; - dump->regs.sp = regs->sp; - dump->regs.ss = (u16)regs->ss; - - dump->u_fpvalid = dump_fpu(regs, &dump->i387); -} - -#endif /* CONFIG_X86_32 */ -#endif /* __KERNEL__ */ -#endif /* _ASM_X86_A_OUT_CORE_H */ diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 1b010a859b8b..d937c55e717e 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * X86 specific ACPICA environments and implementation * * Copyright (C) 2014, Intel Corporation * Author: Lv Zheng <lv.zheng@intel.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _ASM_X86_ACENV_H @@ -16,7 +13,19 @@ /* Asm macros */ -#define ACPI_FLUSH_CPU_CACHE() wbinvd() +/* + * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states. + * It is required to prevent data loss. + * + * While running inside virtual machine, the kernel can bypass cache flushing. + * Changing sleep state in a virtual machine doesn't affect the host system + * sleep state and cannot lead to data loss. + */ +#define ACPI_FLUSH_CPU_CACHE() \ +do { \ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \ + wbinvd(); \ +} while (0) int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 2f01eb4d6208..a03aa6f999d1 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -1,37 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_ACPI_H #define _ASM_X86_ACPI_H /* * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -#include <acpi/pdc_intel.h> +#include <acpi/proc_cap_intel.h> #include <asm/numa.h> #include <asm/fixmap.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mpspec.h> -#include <asm/realmode.h> #include <asm/x86_init.h> +#include <asm/cpufeature.h> +#include <asm/irq_vectors.h> +#include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -48,6 +35,7 @@ extern int acpi_skip_timer_override; extern int acpi_use_timer_override; extern int acpi_fix_pin2_polarity; extern int acpi_disable_cmcff; +extern bool acpi_int_src_ovr[NR_IRQS_LEGACY]; extern u8 acpi_sci_flags; extern u32 acpi_sci_override_gsi; @@ -68,6 +56,8 @@ static inline void disable_acpi(void) extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); +extern int acpi_blacklisted(void); + static inline void acpi_noirq_set(void) { acpi_noirq = 1; } static inline void acpi_disable_pci(void) { @@ -79,7 +69,21 @@ static inline void acpi_disable_pci(void) extern int (*acpi_suspend_lowlevel)(void); /* Physical address to resume after wakeup */ -#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) +unsigned long acpi_get_wakeup_address(void); + +static inline bool acpi_skip_set_wakeup_address(void) +{ + return cpu_feature_enabled(X86_FEATURE_XENPV); +} + +#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address + +union acpi_subtable_headers; + +int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, + const unsigned long end); + +void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa); /* * Check if the CPU can handle C2 and deeper @@ -110,23 +114,42 @@ static inline bool arch_has_acpi_pdc(void) c->x86_vendor == X86_VENDOR_CENTAUR); } -static inline void arch_acpi_set_pdc_bits(u32 *buf) +static inline void arch_acpi_set_proc_cap_bits(u32 *cap) { struct cpuinfo_x86 *c = &cpu_data(0); - buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + *cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP; + + /* Enable coordination with firmware's _TSD info */ + *cap |= ACPI_PROC_CAP_SMP_T_SWCOORD; if (cpu_has(c, X86_FEATURE_EST)) - buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP; if (cpu_has(c, X86_FEATURE_ACPI)) - buf[2] |= ACPI_PDC_T_FFH; + *cap |= ACPI_PROC_CAP_T_FFH; + + if (cpu_has(c, X86_FEATURE_HWP)) + *cap |= ACPI_PROC_CAP_COLLAB_PROC_PERF; /* - * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + * If mwait/monitor is unsupported, C_C1_FFH and + * C2/C3_FFH will be disabled. */ - if (!cpu_has(c, X86_FEATURE_MWAIT)) - buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); + if (!cpu_has(c, X86_FEATURE_MWAIT) || + boot_option_idle_override == IDLE_NOMWAIT) + *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); + + if (xen_initial_domain()) { + /* + * When Linux is running as Xen dom0, the hypervisor is the + * entity in charge of the processor power management, and so + * Xen needs to check the OS capabilities reported in the + * processor capabilities buffer matches what the hypervisor + * driver supports. + */ + xen_sanitize_proc_cap_bits(cap); + } } static inline bool acpi_has_cpu_in_madt(void) @@ -134,16 +157,31 @@ static inline bool acpi_has_cpu_in_madt(void) return !!acpi_lapic; } +#define ACPI_HAVE_ARCH_SET_ROOT_POINTER +static __always_inline void acpi_arch_set_root_pointer(u64 addr) +{ + x86_init.acpi.set_root_pointer(addr); +} + #define ACPI_HAVE_ARCH_GET_ROOT_POINTER -static inline u64 acpi_arch_get_root_pointer(void) +static __always_inline u64 acpi_arch_get_root_pointer(void) { return x86_init.acpi.get_root_pointer(); } void acpi_generic_reduced_hw_init(void); +void x86_default_set_root_pointer(u64 addr); u64 x86_default_get_root_pointer(void); +#ifdef CONFIG_XEN_PV +/* A Xen PV domain needs a special acpi_os_ioremap() handling. */ +extern void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys, + acpi_size size); +void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +#define acpi_os_ioremap acpi_os_ioremap +#endif + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -155,6 +193,8 @@ static inline void disable_acpi(void) { } static inline void acpi_generic_reduced_hw_init(void) { } +static inline void x86_default_set_root_pointer(u64 addr) { } + static inline u64 x86_default_get_root_pointer(void) { return 0; @@ -168,7 +208,7 @@ static inline u64 x86_default_get_root_pointer(void) extern int x86_acpi_numa_init(void); #endif /* CONFIG_ACPI_NUMA */ -#define acpi_unlazy_tlb(x) leave_mm(x) +struct cper_ia_proc_ctx; #ifdef CONFIG_ACPI_APEI static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) @@ -188,6 +228,15 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) */ return PAGE_KERNEL_NOENC; } + +int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); +#else +static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) +{ + return -EINVAL; +} #endif #define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT) diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h new file mode 100644 index 000000000000..fab11192c60a --- /dev/null +++ b/arch/x86/include/asm/acrn.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ACRN_H +#define _ASM_X86_ACRN_H + +/* + * This CPUID returns feature bitmaps in EAX. + * Guest VM uses this to detect the appropriate feature bit. + */ +#define ACRN_CPUID_FEATURES 0x40000001 +/* Bit 0 indicates whether guest VM is privileged */ +#define ACRN_FEATURE_PRIVILEGED_VM BIT(0) + +/* + * Timing Information. + * This leaf returns the current TSC frequency in kHz. + * + * EAX: (Virtual) TSC frequency in kHz. + * EBX, ECX, EDX: RESERVED (reserved fields are set to zero). + */ +#define ACRN_CPUID_TIMING_INFO 0x40000010 + +void acrn_setup_intr_handler(void (*handler)(void)); +void acrn_remove_intr_handler(void); + +static inline u32 acrn_cpuid_base(void) +{ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return cpuid_base_hypervisor("ACRNACRNACRN", 0); + + return 0; +} + +static inline unsigned long acrn_get_tsc_khz(void) +{ + return cpuid_eax(ACRN_CPUID_TIMING_INFO); +} + +/* + * Hypercalls for ACRN + * + * - VMCALL instruction is used to implement ACRN hypercalls. + * - ACRN hypercall ABI: + * - Hypercall number is passed in R8 register. + * - Up to 2 arguments are passed in RDI, RSI. + * - Return value will be placed in RAX. + * + * Because GCC doesn't support R8 register as direct register constraints, use + * supported constraint as input with a explicit MOV to R8 in beginning of asm. + */ +static inline long acrn_hypercall0(unsigned long hcall_id) +{ + long result; + + asm volatile("movl %1, %%r8d\n\t" + "vmcall\n\t" + : "=a" (result) + : "g" (hcall_id) + : "r8", "memory"); + + return result; +} + +static inline long acrn_hypercall1(unsigned long hcall_id, + unsigned long param1) +{ + long result; + + asm volatile("movl %1, %%r8d\n\t" + "vmcall\n\t" + : "=a" (result) + : "g" (hcall_id), "D" (param1) + : "r8", "memory"); + + return result; +} + +static inline long acrn_hypercall2(unsigned long hcall_id, + unsigned long param1, + unsigned long param2) +{ + long result; + + asm volatile("movl %1, %%r8d\n\t" + "vmcall\n\t" + : "=a" (result) + : "g" (hcall_id), "D" (param1), "S" (param2) + : "r8", "memory"); + + return result; +} + +#endif /* _ASM_X86_ACRN_H */ diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h index 8e25bf4f323a..c8c111d8fbd7 100644 --- a/arch/x86/include/asm/agp.h +++ b/arch/x86/include/asm/agp.h @@ -2,14 +2,14 @@ #ifndef _ASM_X86_AGP_H #define _ASM_X86_AGP_H -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <asm/cacheflush.h> /* * Functions to keep the agpgart mappings coherent with the MMU. The * GART gives the CPU a physical alias of pages in memory. The alias * region is mapped uncacheable. Make sure there are no conflicting - * mappings with different cachability attributes for the same + * mappings with different cacheability attributes for the same * page. This avoids data corruption on some CPUs. */ @@ -23,10 +23,4 @@ */ #define flush_agp_cache() wbinvd() -/* GATT allocation. Returns/accepts GATT kernel virtual address. */ -#define alloc_gatt_pages(order) \ - ((char *)__get_free_pages(GFP_KERNEL, (order))) -#define free_gatt_pages(table, order) \ - free_pages((unsigned long)(table), (order)) - #endif /* _ASM_X86_AGP_H */ diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h deleted file mode 100644 index 31b627b43a8e..000000000000 --- a/arch/x86/include/asm/alternative-asm.h +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_ALTERNATIVE_ASM_H -#define _ASM_X86_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -#include <asm/asm.h> - -#ifdef CONFIG_SMP - .macro LOCK_PREFIX -672: lock - .pushsection .smp_locks,"a" - .balign 4 - .long 672b - . - .popsection - .endm -#else - .macro LOCK_PREFIX - .endm -#endif - -/* - * Issue one struct alt_instr descriptor entry (need to put it into - * the section .altinstructions, see below). This entry contains - * enough information for the alternatives patching code to patch an - * instruction. See apply_alternatives(). - */ -.macro altinstruction_entry orig alt feature orig_len alt_len pad_len - .long \orig - . - .long \alt - . - .word \feature - .byte \orig_len - .byte \alt_len - .byte \pad_len -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. ".skip" directive takes care of proper instruction padding - * in case @newinstr is longer than @oldinstr. - */ -.macro ALTERNATIVE oldinstr, newinstr, feature -140: - \oldinstr -141: - .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr -144: - .popsection -.endm - -#define old_len 141b-140b -#define new_len1 144f-143f -#define new_len2 145f-144f - -/* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) - - -/* - * Same as ALTERNATIVE macro above but for two alternatives. If CPU - * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has - * @feature2, it replaces @oldinstr with @feature2. - */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 -140: - \oldinstr -141: - .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_short(new_len1, new_len2) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_ALTERNATIVE_ASM_H */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 0660e14690c8..03364510d5fe 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -2,12 +2,23 @@ #ifndef _ASM_X86_ALTERNATIVE_H #define _ASM_X86_ALTERNATIVE_H -#ifndef __ASSEMBLY__ - #include <linux/types.h> -#include <linux/stddef.h> #include <linux/stringify.h> +#include <linux/objtool.h> #include <asm/asm.h> +#include <asm/bug.h> + +#define ALT_FLAGS_SHIFT 16 + +#define ALT_FLAG_NOT (1 << 0) +#define ALT_NOT(feature) ((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature)) +#define ALT_FLAG_DIRECT_CALL (1 << 1) +#define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature)) +#define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS) + +#ifndef __ASSEMBLER__ + +#include <linux/stddef.h> /* * Alternative inline assembly for SMP. @@ -38,22 +49,46 @@ ".popsection\n" \ "671:" -#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " +#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock " #else /* ! CONFIG_SMP */ #define LOCK_PREFIX_HERE "" #define LOCK_PREFIX "" #endif +/* + * The patching flags are part of the upper bits of the @ft_flags parameter when + * specifying them. The split is currently like this: + * + * [31... flags ...16][15... CPUID feature bit ...0] + * + * but since this is all hidden in the macros argument being split, those fields can be + * extended in the future to fit in a u64 or however the need arises. + */ struct alt_instr { s32 instr_offset; /* original instruction */ s32 repl_offset; /* offset to replacement instruction */ - u16 cpuid; /* cpuid bit set for replacement */ + + union { + struct { + u32 cpuid: 16; /* CPUID bit set for replacement */ + u32 flags: 16; /* patching control flags */ + }; + u32 ft_flags; + }; + u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction */ - u8 padlen; /* length of build-time padding */ } __packed; +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; +extern s32 __cfi_sites[], __cfi_sites_end[]; +extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; +extern s32 __smp_locks[], __smp_locks_end[]; + /* * Debug flag that can be tested to see whether alternative * instructions were patched in already: @@ -62,9 +97,71 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); +extern void apply_seal_endbr(s32 *start, s32 *end); +extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, + s32 *start_cfi, s32 *end_cfi); struct module; +struct callthunk_sites { + s32 *call_start, *call_end; +}; + +#ifdef CONFIG_CALL_THUNKS +extern void callthunks_patch_builtin_calls(void); +extern void callthunks_patch_module_calls(struct callthunk_sites *sites, + struct module *mod); +extern void *callthunks_translate_call_dest(void *dest); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip); +#else +static __always_inline void callthunks_patch_builtin_calls(void) {} +static __always_inline void +callthunks_patch_module_calls(struct callthunk_sites *sites, + struct module *mod) {} +static __always_inline void *callthunks_translate_call_dest(void *dest) +{ + return dest; +} +static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, + void *func, void *ip) +{ + return 0; +} +#endif + +#ifdef CONFIG_MITIGATION_ITS +extern void its_init_mod(struct module *mod); +extern void its_fini_mod(struct module *mod); +extern void its_free_mod(struct module *mod); +extern u8 *its_static_thunk(int reg); +#else /* CONFIG_MITIGATION_ITS */ +static inline void its_init_mod(struct module *mod) { } +static inline void its_fini_mod(struct module *mod) { } +static inline void its_free_mod(struct module *mod) { } +static inline u8 *its_static_thunk(int reg) +{ + WARN_ONCE(1, "ITS not compiled in"); + + return NULL; +} +#endif + +#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL) +extern bool cpu_wants_rethunk(void); +extern bool cpu_wants_rethunk_at(void *addr); +#else +static __always_inline bool cpu_wants_rethunk(void) +{ + return false; +} +static __always_inline bool cpu_wants_rethunk_at(void *addr) +{ + return false; +} +#endif + #ifdef CONFIG_SMP extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, @@ -85,74 +182,55 @@ static inline int alternatives_text_reserved(void *start, void *end) } #endif /* CONFIG_SMP */ -#define b_replacement(num) "664"#num -#define e_replacement(num) "665"#num - -#define alt_end_marker "663" -#define alt_slen "662b-661b" -#define alt_pad_len alt_end_marker"b-662b" -#define alt_total_slen alt_end_marker"b-661b" -#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" - -#define __OLDINSTR(oldinstr, num) \ - "661:\n\t" oldinstr "\n662:\n" \ - ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ - "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" - -#define OLDINSTR(oldinstr, num) \ - __OLDINSTR(oldinstr, num) \ - alt_end_marker ":\n" - -/* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" - -/* - * Pad the second replacement alternative with additional NOPs if it is - * additionally longer than the first replacement alternative. - */ -#define OLDINSTR_2(oldinstr, num1, num2) \ - "661:\n\t" oldinstr "\n662:\n" \ - ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ - "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ - alt_end_marker ":\n" - -#define ALTINSTR_ENTRY(feature, num) \ - " .long 661b - .\n" /* label */ \ - " .long " b_replacement(num)"f - .\n" /* new instruction */ \ - " .word " __stringify(feature) "\n" /* feature bit */ \ +#define ALT_CALL_INSTR "call BUG_func" + +#define alt_slen "772b-771b" +#define alt_total_slen "773b-771b" +#define alt_rlen "775f-774f" + +#define OLDINSTR(oldinstr) \ + "# ALT: oldinstr\n" \ + "771:\n\t" oldinstr "\n772:\n" \ + "# ALT: padding\n" \ + ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen ")-(" alt_slen ")),0x90\n" \ + "773:\n" + +#define ALTINSTR_ENTRY(ft_flags) \ + ".pushsection .altinstructions, \"aM\", @progbits, " \ + __stringify(ALT_INSTR_SIZE) "\n" \ + " .long 771b - .\n" /* label */ \ + " .long 774f - .\n" /* new instruction */ \ + " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \ " .byte " alt_total_slen "\n" /* source len */ \ - " .byte " alt_rlen(num) "\n" /* replacement len */ \ - " .byte " alt_pad_len "\n" /* pad len */ - -#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ - b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" - -/* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, newinstr, feature) \ - OLDINSTR(oldinstr, 1) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature, 1) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + " .byte " alt_rlen "\n" /* replacement len */ \ ".popsection\n" -#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ - OLDINSTR_2(oldinstr, 1, 2) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature1, 1) \ - ALTINSTR_ENTRY(feature2, 2) \ - ".popsection\n" \ +#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \ ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ - ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ANNOTATE_DATA_SPECIAL "\n" \ + "# ALT: replacement\n" \ + "774:\n\t" newinstr "\n775:\n" \ ".popsection\n" +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \ + OLDINSTR(oldinstr) \ + ALTINSTR_ENTRY(ft_flags) \ + ALTINSTR_REPLACEMENT(newinstr) + +#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ + ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2) + +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags) + +#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \ + newinstr3, ft_flags3) \ + ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \ + newinstr3, ft_flags3) + /* * Alternative instructions for different CPU types or capabilities. * @@ -165,11 +243,11 @@ static inline int alternatives_text_reserved(void *start, void *end) * For non barrier like inlines please define new variants * without volatile and memory clobber. */ -#define alternative(oldinstr, newinstr, feature) \ - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") +#define alternative(oldinstr, newinstr, ft_flags) \ + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory") -#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ - asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") +#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory") /* * Alternative inline assembly with input. @@ -177,38 +255,33 @@ static inline int alternatives_text_reserved(void *start, void *end) * Peculiarities: * No memory clobber here. * Argument numbers start with 1. - * Best is to use constraints that are fixed size (like (%1) ... "r") - * If you use variable sized constraints like "m" or "g" in the - * replacement make sure to pad to the worst case length. * Leaving an unused argument 0 to keep API compatibility. */ -#define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ - : : "i" (0), ## input) - -/* - * This is similar to alternative_input. But it has two features and - * respective instructions. - * - * If CPU has feature2, newinstr2 is used. - * Otherwise, if CPU has feature1, newinstr1 is used. - * Otherwise, oldinstr is used. - */ -#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \ - feature2, input...) \ - asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ - newinstr2, feature2) \ +#define alternative_input(oldinstr, newinstr, ft_flags, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ +#define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : output : "i" (0), ## input) -/* Like alternative_io, but for replacing a direct call with another one. */ -#define alternative_call(oldfunc, newfunc, feature, output, input...) \ - asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ - : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) +/* + * Like alternative_io, but for replacing a direct call with another one. + * + * Use the %c operand modifier which is the generic way to print a bare + * constant expression with all syntax-specific punctuation omitted. %P + * is the x86-specific variant which can handle constants too, for + * historical reasons, but it should be used primarily for PIC + * references: i.e., if used for a function, it would add the PLT + * suffix. + */ +#define alternative_call(oldfunc, newfunc, ft_flags, output, input, clobbers...) \ + asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ + : ALT_OUTPUT_SP(output) \ + : [old] "i" (oldfunc), [new] "i" (newfunc) \ + COMMA(input) \ + : clobbers) /* * Like alternative_call, but there are two features and respective functions. @@ -216,26 +289,113 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, function1 is used. * Otherwise, old function is used. */ -#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ - output, input...) \ - asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ - "call %P[new2]", feature2) \ - : output, ASM_CALL_CONSTRAINT \ - : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ - [new2] "i" (newfunc2), ## input) +#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ + output, input, clobbers...) \ + asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ + "call %c[new2]", ft_flags2) \ + : ALT_OUTPUT_SP(output) \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2) \ + COMMA(input) \ + : clobbers) + +#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__ + +/* Macro for creating assembler functions avoiding any C magic. */ +#define DEFINE_ASM_FUNC(func, instr, sec) \ + asm (".pushsection " #sec ", \"ax\"\n" \ + ".global " #func "\n\t" \ + ".type " #func ", @function\n\t" \ + ASM_FUNC_ALIGN "\n" \ + #func ":\n\t" \ + ASM_ENDBR \ + instr "\n\t" \ + ASM_RET \ + ".size " #func ", . - " #func "\n\t" \ + ".popsection") + +void BUG_func(void); +void nop_func(void); + +#else /* __ASSEMBLER__ */ + +#ifdef CONFIG_SMP + .macro LOCK_PREFIX +672: lock + .pushsection .smp_locks,"a" + .balign 4 + .long 672b - . + .popsection + .endm +#else + .macro LOCK_PREFIX + .endm +#endif /* - * use this macro(s) if you need more than one output parameter - * in alternative_io + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). */ -#define ASM_OUTPUT2(a...) a +.macro altinstr_entry orig alt ft_flags orig_len alt_len + .long \orig - . + .long \alt - . + .4byte \ft_flags + .byte \orig_len + .byte \alt_len +.endm + +.macro ALT_CALL_INSTR + call BUG_func +.endm /* - * use this macro if you need clobbers but no inputs in - * alternative_{input,io,call}() + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. */ -#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr +#define __ALTERNATIVE(oldinst, newinst, flag) \ +740: \ + oldinst ; \ +741: \ + .skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\ +742: \ + .pushsection .altinstructions, "aM", @progbits, ALT_INSTR_SIZE ;\ + altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \ + .popsection ; \ + .pushsection .altinstr_replacement,"ax" ; \ +743: \ + ANNOTATE_DATA_SPECIAL ; \ + newinst ; \ +744: \ + .popsection ; + +.macro ALTERNATIVE oldinstr, newinstr, ft_flags + __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags) +.endm -#endif /* __ASSEMBLY__ */ +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2 + __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1), + \newinstr2, \ft_flags2) +.endm + +.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3 + __ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2), + \newinstr3, \ft_flags3) +.endm + +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ + newinstr_yes, ft_flags + +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd/hsmp.h b/arch/x86/include/asm/amd/hsmp.h new file mode 100644 index 000000000000..2137f62853ed --- /dev/null +++ b/arch/x86/include/asm/amd/hsmp.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_AMD_HSMP_H_ +#define _ASM_X86_AMD_HSMP_H_ + +#include <uapi/asm/amd_hsmp.h> + +#if IS_ENABLED(CONFIG_AMD_HSMP) +int hsmp_send_message(struct hsmp_message *msg); +#else +static inline int hsmp_send_message(struct hsmp_message *msg) +{ + return -ENODEV; +} +#endif + +#endif /*_ASM_X86_AMD_HSMP_H_*/ diff --git a/arch/x86/include/asm/amd/ibs.h b/arch/x86/include/asm/amd/ibs.h new file mode 100644 index 000000000000..3ee5903982c2 --- /dev/null +++ b/arch/x86/include/asm/amd/ibs.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_IBS_H +#define _ASM_X86_AMD_IBS_H + +/* + * From PPR Vol 1 for AMD Family 19h Model 01h B1 + * 55898 Rev 0.35 - Feb 5, 2021 + */ + +#include <asm/msr-index.h> + +/* IBS_OP_DATA2 DataSrc */ +#define IBS_DATA_SRC_LOC_CACHE 2 +#define IBS_DATA_SRC_DRAM 3 +#define IBS_DATA_SRC_REM_CACHE 4 +#define IBS_DATA_SRC_IO 7 + +/* IBS_OP_DATA2 DataSrc Extension */ +#define IBS_DATA_SRC_EXT_LOC_CACHE 1 +#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2 +#define IBS_DATA_SRC_EXT_DRAM 3 +#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5 +#define IBS_DATA_SRC_EXT_PMEM 6 +#define IBS_DATA_SRC_EXT_IO 7 +#define IBS_DATA_SRC_EXT_EXT_MEM 8 +#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12 + +/* + * IBS Hardware MSRs + */ + +/* MSR 0xc0011030: IBS Fetch Control */ +union ibs_fetch_ctl { + __u64 val; + struct { + __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */ + fetch_cnt:16, /* 16-31: instruction fetch count */ + fetch_lat:16, /* 32-47: instruction fetch latency */ + fetch_en:1, /* 48: instruction fetch enable */ + fetch_val:1, /* 49: instruction fetch valid */ + fetch_comp:1, /* 50: instruction fetch complete */ + ic_miss:1, /* 51: i-cache miss */ + phy_addr_valid:1,/* 52: physical address valid */ + l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size + * (needs IbsPhyAddrValid) */ + l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */ + l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */ + rand_en:1, /* 57: random tagging enable */ + fetch_l2_miss:1,/* 58: L2 miss for sampled fetch + * (needs IbsFetchComp) */ + l3_miss_only:1, /* 59: Collect L3 miss samples only */ + fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */ + fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */ + reserved:2; /* 62-63: reserved */ + }; +}; + +/* MSR 0xc0011033: IBS Execution Control */ +union ibs_op_ctl { + __u64 val; + struct { + __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ + l3_miss_only:1, /* 16: Collect L3 miss samples only */ + op_en:1, /* 17: op sampling enable */ + op_val:1, /* 18: op sample valid */ + cnt_ctl:1, /* 19: periodic op counter control */ + opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ + reserved0:5, /* 27-31: reserved */ + opcurcnt:27, /* 32-58: periodic op counter current count */ + ldlat_thrsh:4, /* 59-62: Load Latency threshold */ + ldlat_en:1; /* 63: Load Latency enabled */ + }; +}; + +/* MSR 0xc0011035: IBS Op Data 1 */ +union ibs_op_data { + __u64 val; + struct { + __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ + tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ + reserved1:2, /* 32-33: reserved */ + op_return:1, /* 34: return op */ + op_brn_taken:1, /* 35: taken branch op */ + op_brn_misp:1, /* 36: mispredicted branch op */ + op_brn_ret:1, /* 37: branch op retired */ + op_rip_invalid:1, /* 38: RIP is invalid */ + op_brn_fuse:1, /* 39: fused branch op */ + op_microcode:1, /* 40: microcode op */ + reserved2:23; /* 41-63: reserved */ + }; +}; + +/* MSR 0xc0011036: IBS Op Data 2 */ +union ibs_op_data2 { + __u64 val; + struct { + __u64 data_src_lo:3, /* 0-2: data source low */ + reserved0:1, /* 3: reserved */ + rmt_node:1, /* 4: destination node */ + cache_hit_st:1, /* 5: cache hit state */ + data_src_hi:2, /* 6-7: data source high */ + reserved1:56; /* 8-63: reserved */ + }; +}; + +/* MSR 0xc0011037: IBS Op Data 3 */ +union ibs_op_data3 { + __u64 val; + struct { + __u64 ld_op:1, /* 0: load op */ + st_op:1, /* 1: store op */ + dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ + dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ + dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ + dc_miss:1, /* 7: data cache miss */ + dc_mis_acc:1, /* 8: misaligned access */ + reserved:4, /* 9-12: reserved */ + dc_wc_mem_acc:1, /* 13: write combining memory access */ + dc_uc_mem_acc:1, /* 14: uncacheable memory access */ + dc_locked_op:1, /* 15: locked operation */ + dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */ + dc_lin_addr_valid:1, /* 17: data cache linear address valid */ + dc_phy_addr_valid:1, /* 18: data cache physical address valid */ + dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */ + l2_miss:1, /* 20: L2 cache miss */ + sw_pf:1, /* 21: software prefetch */ + op_mem_width:4, /* 22-25: load/store size in bytes */ + op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */ + dc_miss_lat:16, /* 32-47: data cache miss latency */ + tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */ + }; +}; + +/* MSR 0xc001103c: IBS Fetch Control Extended */ +union ic_ibs_extd_ctl { + __u64 val; + struct { + __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */ + reserved:48; /* 16-63: reserved */ + }; +}; + +/* + * IBS driver related + */ + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; + +#endif /* _ASM_X86_AMD_IBS_H */ diff --git a/arch/x86/include/asm/amd/nb.h b/arch/x86/include/asm/amd/nb.h new file mode 100644 index 000000000000..ddb5108cf46c --- /dev/null +++ b/arch/x86/include/asm/amd/nb.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_NB_H +#define _ASM_X86_AMD_NB_H + +#include <linux/ioport.h> +#include <linux/pci.h> +#include <asm/amd/node.h> + +struct amd_nb_bus_dev_range { + u8 bus; + u8 dev_base; + u8 dev_limit; +}; + +extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; + +extern bool early_is_amd_nb(u32 value); +extern struct resource *amd_get_mmconfig_range(struct resource *res); +extern void amd_flush_garts(void); +extern int amd_numa_init(void); +extern int amd_get_subcaches(int); +extern int amd_set_subcaches(int, unsigned long); + +struct amd_l3_cache { + unsigned indices; + u8 subcaches[4]; +}; + +struct amd_northbridge { + struct pci_dev *misc; + struct pci_dev *link; + struct amd_l3_cache l3_cache; +}; + +struct amd_northbridge_info { + u16 num; + u64 flags; + struct amd_northbridge *nb; +}; + +#define AMD_NB_GART BIT(0) +#define AMD_NB_L3_INDEX_DISABLE BIT(1) +#define AMD_NB_L3_PARTITIONING BIT(2) + +#ifdef CONFIG_AMD_NB + +u16 amd_nb_num(void); +bool amd_nb_has_feature(unsigned int feature); +struct amd_northbridge *node_to_amd_nb(int node); + +static inline bool amd_gart_present(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return false; + + /* GART present only on Fam15h, up to model 0fh */ + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || + (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10)) + return true; + + return false; +} + +#else + +#define amd_nb_num(x) 0 +#define amd_nb_has_feature(x) false +static inline struct amd_northbridge *node_to_amd_nb(int node) +{ + return NULL; +} +#define amd_gart_present(x) false + +#endif + + +#endif /* _ASM_X86_AMD_NB_H */ diff --git a/arch/x86/include/asm/amd/node.h b/arch/x86/include/asm/amd/node.h new file mode 100644 index 000000000000..a672b8765fa8 --- /dev/null +++ b/arch/x86/include/asm/amd/node.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Node helper functions and common defines + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> + * + * Note: + * Items in this file may only be used in a single place. + * However, it's prudent to keep all AMD Node functionality + * in a unified place rather than spreading throughout the + * kernel. + */ + +#ifndef _ASM_X86_AMD_NODE_H_ +#define _ASM_X86_AMD_NODE_H_ + +#include <linux/pci.h> + +#define MAX_AMD_NUM_NODES 8 +#define AMD_NODE0_PCI_SLOT 0x18 + +struct pci_dev *amd_node_get_func(u16 node, u8 func); + +static inline u16 amd_num_nodes(void) +{ + return topology_amd_nodes_per_pkg() * topology_max_packages(); +} + +#ifdef CONFIG_AMD_NODE +int __must_check amd_smn_read(u16 node, u32 address, u32 *value); +int __must_check amd_smn_write(u16 node, u32 address, u32 value); + +/* Should only be used by the HSMP driver. */ +int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write); +#else +static inline int __must_check amd_smn_read(u16 node, u32 address, u32 *value) { return -ENODEV; } +static inline int __must_check amd_smn_write(u16 node, u32 address, u32 value) { return -ENODEV; } + +static inline int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write) +{ + return -ENODEV; +} +#endif /* CONFIG_AMD_NODE */ + +/* helper for use with read_poll_timeout */ +static inline int smn_read_register(u32 reg) +{ + int data, rc; + + rc = amd_smn_read(0, reg, &data); + if (rc) + return rc; + + return data; +} +#endif /*_ASM_X86_AMD_NODE_H_*/ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h deleted file mode 100644 index 1ae4e5791afa..000000000000 --- a/arch/x86/include/asm/amd_nb.h +++ /dev/null @@ -1,127 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_AMD_NB_H -#define _ASM_X86_AMD_NB_H - -#include <linux/ioport.h> -#include <linux/pci.h> -#include <linux/refcount.h> - -struct amd_nb_bus_dev_range { - u8 bus; - u8 dev_base; - u8 dev_limit; -}; - -extern const struct pci_device_id amd_nb_misc_ids[]; -extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; - -extern bool early_is_amd_nb(u32 value); -extern struct resource *amd_get_mmconfig_range(struct resource *res); -extern int amd_cache_northbridges(void); -extern void amd_flush_garts(void); -extern int amd_numa_init(void); -extern int amd_get_subcaches(int); -extern int amd_set_subcaches(int, unsigned long); - -extern int amd_smn_read(u16 node, u32 address, u32 *value); -extern int amd_smn_write(u16 node, u32 address, u32 value); -extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); - -struct amd_l3_cache { - unsigned indices; - u8 subcaches[4]; -}; - -struct threshold_block { - unsigned int block; /* Number within bank */ - unsigned int bank; /* MCA bank the block belongs to */ - unsigned int cpu; /* CPU which controls MCA bank */ - u32 address; /* MSR address for the block */ - u16 interrupt_enable; /* Enable/Disable APIC interrupt */ - bool interrupt_capable; /* Bank can generate an interrupt. */ - - u16 threshold_limit; /* - * Value upon which threshold - * interrupt is generated. - */ - - struct kobject kobj; /* sysfs object */ - struct list_head miscj; /* - * List of threshold blocks - * within a bank. - */ -}; - -struct threshold_bank { - struct kobject *kobj; - struct threshold_block *blocks; - - /* initialized to the number of CPUs on the node sharing this bank */ - refcount_t cpus; -}; - -struct amd_northbridge { - struct pci_dev *root; - struct pci_dev *misc; - struct pci_dev *link; - struct amd_l3_cache l3_cache; - struct threshold_bank *bank4; -}; - -struct amd_northbridge_info { - u16 num; - u64 flags; - struct amd_northbridge *nb; -}; - -#define AMD_NB_GART BIT(0) -#define AMD_NB_L3_INDEX_DISABLE BIT(1) -#define AMD_NB_L3_PARTITIONING BIT(2) - -#ifdef CONFIG_AMD_NB - -u16 amd_nb_num(void); -bool amd_nb_has_feature(unsigned int feature); -struct amd_northbridge *node_to_amd_nb(int node); - -static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) -{ - struct pci_dev *misc; - int i; - - for (i = 0; i != amd_nb_num(); i++) { - misc = node_to_amd_nb(i)->misc; - - if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) && - PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn)) - return i; - } - - WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev)); - return 0; -} - -static inline bool amd_gart_present(void) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) - return false; - - /* GART present only on Fam15h, upto model 0fh */ - if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || - (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10)) - return true; - - return false; -} - -#else - -#define amd_nb_num(x) 0 -#define amd_nb_has_feature(x) false -#define node_to_amd_nb(x) NULL -#define amd_gart_present(x) false - -#endif - - -#endif /* _ASM_X86_AMD_NB_H */ diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h deleted file mode 100644 index 0acbac299e49..000000000000 --- a/arch/x86/include/asm/apb_timer.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare - * - * (C) Copyright 2009 Intel Corporation - * Author: Jacob Pan (jacob.jun.pan@intel.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * - * Note: - */ - -#ifndef ASM_X86_APBT_H -#define ASM_X86_APBT_H -#include <linux/sfi.h> - -#ifdef CONFIG_APB_TIMER - -/* default memory mapped register base */ -#define LNW_SCU_ADDR 0xFF100000 -#define LNW_EXT_TIMER_OFFSET 0x1B800 -#define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET) -#define LNW_EXT_TIMER_PGOFFSET 0x800 - -/* APBT clock speed range from PCLK to fabric base, 25-100MHz */ -#define APBT_MAX_FREQ 50000000 -#define APBT_MIN_FREQ 1000000 -#define APBT_MMAP_SIZE 1024 - -#define APBT_DEV_USED 1 - -extern void apbt_time_init(void); -extern unsigned long apbt_quick_calibrate(void); -extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); -extern void apbt_setup_secondary_clock(void); - -extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); -extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); -extern int sfi_mtimer_num; - -#else /* CONFIG_APB_TIMER */ - -static inline unsigned long apbt_quick_calibrate(void) {return 0; } -static inline void apbt_time_init(void) { } - -#endif -#endif /* ASM_X86_APBT_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 130e81e10fc7..a26e66d66444 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -1,7 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _ASM_X86_APIC_H #define _ASM_X86_APIC_H #include <linux/cpumask.h> +#include <linux/static_call.h> #include <asm/alternative.h> #include <asm/cpufeature.h> @@ -11,9 +13,16 @@ #include <asm/mpspec.h> #include <asm/msr.h> #include <asm/hardirq.h> +#include <asm/io.h> +#include <asm/posted_intr.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 +/* Macros for apic_extnmi which controls external NMI masking */ +#define APIC_EXTNMI_BSP 0 /* Default */ +#define APIC_EXTNMI_ALL 1 +#define APIC_EXTNMI_NONE 2 + /* * Debugging macros */ @@ -21,38 +30,40 @@ #define APIC_VERBOSE 1 #define APIC_DEBUG 2 -/* Macros for apic_extnmi which controls external NMI masking */ -#define APIC_EXTNMI_BSP 0 /* Default */ -#define APIC_EXTNMI_ALL 1 -#define APIC_EXTNMI_NONE 2 - /* - * Define the default level of output to be very little - * This can be turned up by using apic=verbose for more - * information and apic=debug for _lots_ of information. - * apic_verbosity is defined in apic.c + * Define the default level of output to be very little This can be turned + * up by using apic=verbose for more information and apic=debug for _lots_ + * of information. apic_verbosity is defined in apic.c */ -#define apic_printk(v, s, a...) do { \ - if ((v) <= apic_verbosity) \ - printk(s, ##a); \ - } while (0) - +#define apic_printk(v, s, a...) \ +do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ +} while (0) + +#define apic_pr_verbose(s, a...) apic_printk(APIC_VERBOSE, KERN_INFO s, ##a) +#define apic_pr_debug(s, a...) apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a) +#define apic_pr_debug_cont(s, a...) apic_printk(APIC_DEBUG, KERN_CONT s, ##a) +/* Unconditional debug prints for code which is guarded by apic_verbosity already */ +#define apic_dbg(s, a...) printk(KERN_DEBUG s, ##a) #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -extern void generic_apic_probe(void); +extern void x86_32_probe_apic(void); #else -static inline void generic_apic_probe(void) -{ -} +static inline void x86_32_probe_apic(void) { } #endif +extern u32 cpuid_to_apicid[]; + +#define CPU_ACPIID_INVALID U32_MAX + #ifdef CONFIG_X86_LOCAL_APIC -extern unsigned int apic_verbosity; +extern int apic_verbosity; extern int local_apic_timer_c2_ok; -extern int disable_apic; -extern unsigned int lapic_timer_frequency; +extern bool apic_is_disabled; +extern unsigned int lapic_timer_period; extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { @@ -63,20 +74,6 @@ enum apic_intr_mode_id { APIC_SYMMETRIC_IO_NO_ROUTING }; -#ifdef CONFIG_SMP -extern void __inquire_remote_apic(int apicid); -#else /* CONFIG_SMP */ -static inline void __inquire_remote_apic(int apicid) -{ -} -#endif /* CONFIG_SMP */ - -static inline void default_inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} - /* * With 82489DX we can't rely on apic feature bit * retrieved via cpuid but still have to deal with @@ -87,7 +84,7 @@ static inline void default_inquire_remote_apic(int apicid) */ static inline bool apic_from_smp_config(void) { - return smp_found_config && !disable_apic; + return smp_found_config && !apic_is_disabled; } /* @@ -97,24 +94,25 @@ static inline bool apic_from_smp_config(void) #include <asm/paravirt.h> #endif -extern int setup_profiling_timer(unsigned int); - static inline void native_apic_mem_write(u32 reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); - alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP, - ASM_OUTPUT2("=r" (v), "=m" (*addr)), - ASM_OUTPUT2("0" (v), "m" (*addr))); + alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, + ASM_OUTPUT("=r" (v), "=m" (*addr)), + ASM_INPUT("0" (v), "m" (*addr))); } static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); +} + +static inline void native_apic_mem_eoi(void) +{ + native_apic_mem_write(APIC_EOI, APIC_EOI_ACK); } -extern void native_apic_wait_icr_idle(void); -extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); @@ -122,22 +120,22 @@ static inline bool apic_is_x2apic_enabled(void) { u64 msr; - if (rdmsrl_safe(MSR_IA32_APICBASE, &msr)) + if (rdmsrq_safe(MSR_IA32_APICBASE, &msr)) return false; return msr & X2APIC_ENABLE; } extern void enable_IR_x2apic(void); -extern int get_physical_broadcast(void); - extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); +extern void apic_soft_disable(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); extern void apic_intr_mode_init(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); @@ -146,15 +144,14 @@ extern void setup_secondary_APIC_clock(void); extern void lapic_update_tsc_freq(void); #ifdef CONFIG_X86_64 -static inline int apic_force_enable(unsigned long addr) +static inline bool apic_force_enable(unsigned long addr) { - return -1; + return false; } #else -extern int apic_force_enable(unsigned long addr); +extern bool apic_force_enable(unsigned long addr); #endif -extern void apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* @@ -172,8 +169,20 @@ static inline int apic_is_clustered_box(void) extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); extern void lapic_online(void); extern void lapic_offline(void); +extern bool apic_needs_pit(void); + +extern void apic_send_IPI_allbutself(unsigned int vector); + +extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present); +extern void topology_register_boot_apic(u32 apic_id); +extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id); +extern void topology_hotunplug_apic(unsigned int cpu); +extern void topology_apply_cmdline_limits_early(void); +extern void topology_init_possible_cpus(void); +extern void topology_reset_possible_cpus_up(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -184,34 +193,28 @@ static inline void disable_local_APIC(void) { } # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } +static inline bool apic_needs_pit(void) { return true; } +static inline void topology_apply_cmdline_limits_early(void) { } +static inline void topology_init_possible_cpus(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC -/* - * Make previous memory operations globally visible before - * sending the IPI through x2apic wrmsr. We need a serializing instruction or - * mfence for this. - */ -static inline void x2apic_wrmsr_fence(void) -{ - asm volatile("mfence" : : : "memory"); -} - static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || reg == APIC_LVR) return; - wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); + wrmsrq(APIC_BASE_MSR + (reg >> 4), v); } -static inline void native_apic_msr_eoi_write(u32 reg, u32 v) +static inline void native_apic_msr_eoi(void) { - __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); + native_wrmsrq(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK); } static inline u32 native_apic_msr_read(u32 reg) @@ -221,38 +224,26 @@ static inline u32 native_apic_msr_read(u32 reg) if (reg == APIC_DFR) return -1; - rdmsrl(APIC_BASE_MSR + (reg >> 4), msr); + rdmsrq(APIC_BASE_MSR + (reg >> 4), msr); return (u32)msr; } -static inline void native_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static inline u32 native_safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - static inline void native_x2apic_icr_write(u32 low, u32 id) { - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); + wrmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); } static inline u64 native_x2apic_icr_read(void) { unsigned long val; - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + rdmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), val); return val; } extern int x2apic_mode; extern int x2apic_phys; -extern void __init check_x2apic(void); +extern void __init x2apic_set_max_apicid(u32 apicid); extern void x2apic_setup(void); static inline int x2apic_enabled(void) { @@ -261,19 +252,18 @@ static inline int x2apic_enabled(void) #define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC)) #else /* !CONFIG_X86_X2APIC */ -static inline void check_x2apic(void) { } static inline void x2apic_setup(void) { } static inline int x2apic_enabled(void) { return 0; } - +static inline u32 native_apic_msr_read(u32 reg) { BUG(); } #define x2apic_mode (0) #define x2apic_supported() (0) #endif /* !CONFIG_X86_X2APIC */ +extern void __init check_x2apic(void); struct irq_data; /* * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 * * Generic APIC sub-arch data struct. * @@ -283,8 +273,8 @@ struct irq_data; */ struct apic { /* Hotpath functions first */ - void (*eoi_write)(u32 reg, u32 v); - void (*native_eoi_write)(u32 reg, u32 v); + void (*eoi)(void); + void (*native_eoi)(void); void (*write)(u32 reg, u32 v); u32 (*read)(u32 reg); @@ -299,11 +289,10 @@ struct apic { void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); - /* dest_logical is used by the IPI functions */ - u32 dest_logical; - u32 disable_esr; - u32 irq_delivery_mode; - u32 irq_dest_mode; + u32 disable_esr : 1, + dest_mode_logical : 1, + x2apic_set_max_apicid : 1, + nmi_to_offline_cpu : 1; u32 (*calc_dest_apicid)(unsigned int cpu); @@ -311,45 +300,47 @@ struct apic { u64 (*icr_read)(void); void (*icr_write)(u32 low, u32 high); + /* The limit of the APIC ID space. */ + u32 max_apic_id; + /* Probe, setup and smpboot functions */ int (*probe)(void); + void (*setup)(void); + void (*teardown)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_valid)(u32 apicid); - int (*apic_id_registered)(void); - bool (*check_apicid_used)(physid_mask_t *map, int apicid); void (*init_apic_ldr)(void); - void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); - void (*setup_apic_routing)(void); - int (*cpu_present_to_apicid)(int mps_cpu); - void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); - int (*check_phys_apicid_present)(int phys_apicid); - int (*phys_pkg_id)(int cpuid_apic, int index_msb); + u32 (*cpu_present_to_apicid)(int mps_cpu); - u32 (*get_apic_id)(unsigned long x); - u32 (*set_apic_id)(unsigned int id); + u32 (*get_apic_id)(u32 id); /* wakeup_secondary_cpu */ - int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); - - void (*inquire_remote_apic)(int apicid); - -#ifdef CONFIG_X86_32 - /* - * Called very early during boot from get_smp_config(). It should - * return the logical apicid. x86_[bios]_cpu_to_apicid is - * initialized before this function is called. - * - * If logical apicid can't be determined that early, the function - * may return BAD_APICID. Logical apicid will be configured after - * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity - * won't be applied properly during early boot in this case. - */ - int (*x86_32_early_logical_apicid)(int cpu); -#endif + int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu); + /* wakeup secondary CPU using 64-bit wakeup point */ + int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); + + void (*update_vector)(unsigned int cpu, unsigned int vector, bool set); + char *name; }; +struct apic_override { + void (*eoi)(void); + void (*native_eoi)(void); + void (*write)(u32 reg, u32 v); + u32 (*read)(u32 reg); + void (*send_IPI)(int cpu, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu); + int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); +}; + /* * Pointer to the local APIC driver in use on this system (there's * always just one such driver in use - the kernel decides via an @@ -361,19 +352,11 @@ extern struct apic *apic; * APIC drivers are probed based on how they are listed in the .apicdrivers * section. So the order is important and enforced by the ordering * of different apic driver files in the Makefile. - * - * For the files having two apic drivers, we use apic_drivers() - * to enforce the order with in them. */ #define apic_driver(sym) \ static const struct apic *__apicdrivers_##sym __used \ __aligned(sizeof(struct apic *)) \ - __section(.apicdrivers) = { &sym } - -#define apic_drivers(sym1, sym2) \ - static struct apic *__apicdrivers_##sym1##sym2[2] __used \ - __aligned(sizeof(struct apic *)) \ - __section(.apicdrivers) = { &sym1, &sym2 } + __section(".apicdrivers") = { &sym } extern struct apic *__apicdrivers[], *__apicdrivers_end[]; @@ -381,48 +364,121 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; * APIC functionality to boot other CPUs - only used on SMP: */ #ifdef CONFIG_SMP -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); extern int lapic_can_unplug_cpu(void); #endif #ifdef CONFIG_X86_LOCAL_APIC +extern struct apic_override __x86_apic_override; + +void __init apic_setup_apic_calls(void); +void __init apic_install_driver(struct apic *driver); + +#define apic_update_callback(_callback, _fn) { \ + __x86_apic_override._callback = _fn; \ + apic->_callback = _fn; \ + static_call_update(apic_call_##_callback, _fn); \ + pr_info("APIC: %s() replaced with %ps()\n", #_callback, _fn); \ +} + +#define DECLARE_APIC_CALL(__cb) \ + DECLARE_STATIC_CALL(apic_call_##__cb, *apic->__cb) + +DECLARE_APIC_CALL(eoi); +DECLARE_APIC_CALL(native_eoi); +DECLARE_APIC_CALL(icr_read); +DECLARE_APIC_CALL(icr_write); +DECLARE_APIC_CALL(read); +DECLARE_APIC_CALL(send_IPI); +DECLARE_APIC_CALL(send_IPI_mask); +DECLARE_APIC_CALL(send_IPI_mask_allbutself); +DECLARE_APIC_CALL(send_IPI_allbutself); +DECLARE_APIC_CALL(send_IPI_all); +DECLARE_APIC_CALL(send_IPI_self); +DECLARE_APIC_CALL(wait_icr_idle); +DECLARE_APIC_CALL(wakeup_secondary_cpu); +DECLARE_APIC_CALL(wakeup_secondary_cpu_64); +DECLARE_APIC_CALL(write); + +static __always_inline u32 apic_read(u32 reg) +{ + return static_call(apic_call_read)(reg); +} + +static __always_inline void apic_write(u32 reg, u32 val) +{ + static_call(apic_call_write)(reg, val); +} + +static __always_inline void apic_eoi(void) +{ + static_call(apic_call_eoi)(); +} + +static __always_inline void apic_native_eoi(void) +{ + static_call(apic_call_native_eoi)(); +} + +static __always_inline u64 apic_icr_read(void) +{ + return static_call(apic_call_icr_read)(); +} + +static __always_inline void apic_icr_write(u32 low, u32 high) +{ + static_call(apic_call_icr_write)(low, high); +} + +static __always_inline void __apic_send_IPI(int cpu, int vector) +{ + static_call(apic_call_send_IPI)(cpu, vector); +} -static inline u32 apic_read(u32 reg) +static __always_inline void __apic_send_IPI_mask(const struct cpumask *mask, int vector) { - return apic->read(reg); + static_call_mod(apic_call_send_IPI_mask)(mask, vector); } -static inline void apic_write(u32 reg, u32 val) +static __always_inline void __apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - apic->write(reg, val); + static_call(apic_call_send_IPI_mask_allbutself)(mask, vector); } -static inline void apic_eoi(void) +static __always_inline void __apic_send_IPI_allbutself(int vector) { - apic->eoi_write(APIC_EOI, APIC_EOI_ACK); + static_call(apic_call_send_IPI_allbutself)(vector); } -static inline u64 apic_icr_read(void) +static __always_inline void __apic_send_IPI_all(int vector) { - return apic->icr_read(); + static_call(apic_call_send_IPI_all)(vector); } -static inline void apic_icr_write(u32 low, u32 high) +static __always_inline void __apic_send_IPI_self(int vector) { - apic->icr_write(low, high); + static_call_mod(apic_call_send_IPI_self)(vector); } -static inline void apic_wait_icr_idle(void) +static __always_inline void apic_wait_icr_idle(void) { - apic->wait_icr_idle(); + static_call_cond(apic_call_wait_icr_idle)(); } -static inline u32 safe_apic_wait_icr_idle(void) +static __always_inline u32 safe_apic_wait_icr_idle(void) { - return apic->safe_wait_icr_idle(); + return apic->safe_wait_icr_idle ? apic->safe_wait_icr_idle() : 0; } -extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)); +static __always_inline bool apic_id_valid(u32 apic_id) +{ + return apic_id <= apic->max_apic_id; +} + +static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + if (apic->update_vector) + apic->update_vector(cpu, vector, set); +} #else /* CONFIG_X86_LOCAL_APIC */ @@ -433,29 +489,88 @@ static inline u64 apic_icr_read(void) { return 0; } static inline void apic_icr_write(u32 low, u32 high) { } static inline void apic_wait_icr_idle(void) { } static inline u32 safe_apic_wait_icr_idle(void) { return 0; } -static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} +static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); } +static inline void apic_setup_apic_calls(void) { } +static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { } + +#define apic_update_callback(_callback, _fn) do { } while (0) #endif /* CONFIG_X86_LOCAL_APIC */ extern void apic_ack_irq(struct irq_data *data); -static inline void ack_APIC_irq(void) +#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32) +#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10) + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + APIC_VECTOR_TO_REG_OFFSET(vector)); + + return !!(irr & (1U << APIC_VECTOR_TO_BIT_NUMBER(vector))); +} + +static inline bool is_vector_pending(unsigned int vector) +{ + return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector); +} + +#define MAX_APIC_VECTOR 256 +#define APIC_VECTORS_PER_REG 32 + +/* + * Vector states are maintained by APIC in 32-bit registers that are + * 16 bytes aligned. The status of each vector is kept in a single + * bit. + */ +static inline int apic_find_highest_vector(void *bitmap) +{ + int vec; + u32 *reg; + + for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; vec >= 0; vec -= APIC_VECTORS_PER_REG) { + reg = bitmap + APIC_VECTOR_TO_REG_OFFSET(vec); + if (*reg) + return __fls(*reg) + vec; + } + + return -1; +} + +static inline u32 apic_get_reg(void *regs, int reg) +{ + return *((u32 *) (regs + reg)); +} + +static inline void apic_set_reg(void *regs, int reg, u32 val) +{ + *((u32 *) (regs + reg)) = val; +} + +static __always_inline u64 apic_get_reg64(void *regs, int reg) +{ + BUILD_BUG_ON(reg != APIC_ICR); + return *((u64 *) (regs + reg)); +} + +static __always_inline void apic_set_reg64(void *regs, int reg, u64 val) +{ + BUILD_BUG_ON(reg != APIC_ICR); + *((u64 *) (regs + reg)) = val; +} + +static inline void apic_clear_vector(int vec, void *bitmap) { - /* - * ack_APIC_irq() actually gets compiled as a single instruction - * ... yummie. - */ - apic_eoi(); + clear_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec)); } -static inline unsigned default_get_apic_id(unsigned long x) +static inline void apic_set_vector(int vec, void *bitmap) { - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + set_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec)); +} - if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) - return (x >> 24) & 0xFF; - else - return (x >> 24) & 0x0F; +static inline int apic_test_vector(int vec, void *bitmap) +{ + return test_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec)); } /* @@ -464,83 +579,52 @@ static inline unsigned default_get_apic_id(unsigned long x) #define TRAMPOLINE_PHYS_LOW 0x467 #define TRAMPOLINE_PHYS_HIGH 0x469 -#ifdef CONFIG_X86_64 -extern void apic_send_IPI_self(int vector); - -DECLARE_PER_CPU(int, x2apic_extra_bits); -#endif - -extern void generic_bigsmp_probe(void); - #ifdef CONFIG_X86_LOCAL_APIC #include <asm/smp.h> -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); - extern struct apic apic_noop; -static inline unsigned int read_apic_id(void) +static inline u32 read_apic_id(void) { - unsigned int reg = apic_read(APIC_ID); + u32 reg = apic_read(APIC_ID); return apic->get_apic_id(reg); } -extern int default_apic_id_valid(u32 apicid); +#ifdef CONFIG_X86_64 +typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip); extern int default_acpi_madt_oem_check(char *, char *); -extern void default_setup_apic_routing(void); +extern void x86_64_probe_apic(void); +#else +static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; } +static inline void x86_64_probe_apic(void) { } +#endif extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); -extern bool default_check_apicid_used(physid_mask_t *map, int apicid); -extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); -extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int phys_apicid); +extern u32 default_cpu_present_to_apicid(int mps_cpu); -#endif /* CONFIG_X86_LOCAL_APIC */ +void apic_send_nmi_to_offline_cpu(unsigned int cpu); -#ifdef CONFIG_SMP -bool apic_id_is_primary_thread(unsigned int id); -#else -static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } -#endif - -extern void irq_enter(void); -extern void irq_exit(void); +#else /* CONFIG_X86_LOCAL_APIC */ -static inline void entering_irq(void) -{ - irq_enter(); - kvm_set_cpu_l1tf_flush_l1d(); -} +static inline u32 read_apic_id(void) { return 0; } -static inline void entering_ack_irq(void) -{ - entering_irq(); - ack_APIC_irq(); -} +#endif /* !CONFIG_X86_LOCAL_APIC */ -static inline void ipi_entering_ack_irq(void) -{ - irq_enter(); - ack_APIC_irq(); - kvm_set_cpu_l1tf_flush_l1d(); -} +#ifdef CONFIG_SMP +void apic_smt_update(void); +#else +static inline void apic_smt_update(void) { } +#endif -static inline void exiting_irq(void) -{ - irq_exit(); -} +struct msi_msg; +struct irq_cfg; -static inline void exiting_ack_irq(void) -{ - ack_APIC_irq(); - irq_exit(); -} +extern void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, + bool dmar); extern void ioapic_zap_locks(void); diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h deleted file mode 100644 index d3a2b3876ce6..000000000000 --- a/arch/x86/include/asm/apic_flat_64.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_APIC_FLAT_64_H -#define _ASM_X86_APIC_FLAT_64_H - -extern void flat_init_apic_ldr(void); - -#endif - diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 05e694ed8386..be39a543fbe5 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_APICDEF_H #define _ASM_X86_APICDEF_H +#include <linux/bits.h> + /* * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) * @@ -18,6 +20,13 @@ */ #define IO_APIC_SLOT_SIZE 1024 +#define APIC_DELIVERY_MODE_FIXED 0 +#define APIC_DELIVERY_MODE_LOWESTPRIO 1 +#define APIC_DELIVERY_MODE_SMI 2 +#define APIC_DELIVERY_MODE_NMI 4 +#define APIC_DELIVERY_MODE_INIT 5 +#define APIC_DELIVERY_MODE_EXTINT 7 + #define APIC_ID 0x20 #define APIC_LVR 0x30 @@ -89,18 +98,12 @@ #define APIC_DM_EXTINT 0x00700 #define APIC_VECTOR_MASK 0x000FF #define APIC_ICR2 0x310 -#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF) -#define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define GET_XAPIC_DEST_FIELD(x) (((x) >> 24) & 0xFF) +#define SET_XAPIC_DEST_FIELD(x) ((x) << 24) #define APIC_LVTT 0x320 #define APIC_LVTTHMR 0x330 #define APIC_LVTPC 0x340 #define APIC_LVT0 0x350 -#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18) -#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3) -#define SET_APIC_TIMER_BASE(x) (((x) << 18)) -#define APIC_TIMER_BASE_CLKIN 0x0 -#define APIC_TIMER_BASE_TMBASE 0x1 -#define APIC_TIMER_BASE_DIV 0x2 #define APIC_LVT_TIMER_ONESHOT (0 << 17) #define APIC_LVT_TIMER_PERIODIC (1 << 17) #define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) @@ -132,6 +135,8 @@ #define APIC_TDR_DIV_128 0xA #define APIC_EFEAT 0x400 #define APIC_ECTRL 0x410 +#define APIC_SEOI 0x420 +#define APIC_IER 0x480 #define APIC_EILVTn(n) (0x500 + 0x10 * n) #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ #define APIC_EILVT_NR_AMD_10H 4 @@ -144,9 +149,10 @@ #define APIC_EILVT_MASKED (1 << 16) #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -#define APIC_BASE_MSR 0x800 -#define XAPIC_ENABLE (1UL << 11) -#define X2APIC_ENABLE (1UL << 10) +#define APIC_BASE_MSR 0x800 +#define APIC_X2APIC_ID_MSR 0x802 +#define XAPIC_ENABLE BIT(11) +#define X2APIC_ENABLE BIT(10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 @@ -168,279 +174,10 @@ #define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) #define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) -/* - * the local APIC register structure, memory mapped. Not terribly well - * tested, but we might eventually use this one in the future - the - * problem why we cannot use it right now is the P5 APIC, it has an - * errata which cannot take 8-bit reads and writes, only 32-bit ones ... - */ -#define u32 unsigned int - -struct local_apic { - -/*000*/ struct { u32 __reserved[4]; } __reserved_01; - -/*010*/ struct { u32 __reserved[4]; } __reserved_02; - -/*020*/ struct { /* APIC ID Register */ - u32 __reserved_1 : 24, - phys_apic_id : 4, - __reserved_2 : 4; - u32 __reserved[3]; - } id; - -/*030*/ const - struct { /* APIC Version Register */ - u32 version : 8, - __reserved_1 : 8, - max_lvt : 8, - __reserved_2 : 8; - u32 __reserved[3]; - } version; - -/*040*/ struct { u32 __reserved[4]; } __reserved_03; - -/*050*/ struct { u32 __reserved[4]; } __reserved_04; - -/*060*/ struct { u32 __reserved[4]; } __reserved_05; - -/*070*/ struct { u32 __reserved[4]; } __reserved_06; - -/*080*/ struct { /* Task Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } tpr; - -/*090*/ const - struct { /* Arbitration Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } apr; - -/*0A0*/ const - struct { /* Processor Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } ppr; - -/*0B0*/ struct { /* End Of Interrupt Register */ - u32 eoi; - u32 __reserved[3]; - } eoi; - -/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; - -/*0D0*/ struct { /* Logical Destination Register */ - u32 __reserved_1 : 24, - logical_dest : 8; - u32 __reserved_2[3]; - } ldr; - -/*0E0*/ struct { /* Destination Format Register */ - u32 __reserved_1 : 28, - model : 4; - u32 __reserved_2[3]; - } dfr; - -/*0F0*/ struct { /* Spurious Interrupt Vector Register */ - u32 spurious_vector : 8, - apic_enabled : 1, - focus_cpu : 1, - __reserved_2 : 22; - u32 __reserved_3[3]; - } svr; - -/*100*/ struct { /* In Service Register */ -/*170*/ u32 bitfield; - u32 __reserved[3]; - } isr [8]; - -/*180*/ struct { /* Trigger Mode Register */ -/*1F0*/ u32 bitfield; - u32 __reserved[3]; - } tmr [8]; - -/*200*/ struct { /* Interrupt Request Register */ -/*270*/ u32 bitfield; - u32 __reserved[3]; - } irr [8]; - -/*280*/ union { /* Error Status Register */ - struct { - u32 send_cs_error : 1, - receive_cs_error : 1, - send_accept_error : 1, - receive_accept_error : 1, - __reserved_1 : 1, - send_illegal_vector : 1, - receive_illegal_vector : 1, - illegal_register_address : 1, - __reserved_2 : 24; - u32 __reserved_3[3]; - } error_bits; - struct { - u32 errors; - u32 __reserved_3[3]; - } all_errors; - } esr; - -/*290*/ struct { u32 __reserved[4]; } __reserved_08; - -/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; - -/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; - -/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; - -/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; - -/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; - -/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; - -/*300*/ struct { /* Interrupt Command Register 1 */ - u32 vector : 8, - delivery_mode : 3, - destination_mode : 1, - delivery_status : 1, - __reserved_1 : 1, - level : 1, - trigger : 1, - __reserved_2 : 2, - shorthand : 2, - __reserved_3 : 12; - u32 __reserved_4[3]; - } icr1; - -/*310*/ struct { /* Interrupt Command Register 2 */ - union { - u32 __reserved_1 : 24, - phys_dest : 4, - __reserved_2 : 4; - u32 __reserved_3 : 24, - logical_dest : 8; - } dest; - u32 __reserved_4[3]; - } icr2; - -/*320*/ struct { /* LVT - Timer */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - timer_mode : 1, - __reserved_3 : 14; - u32 __reserved_4[3]; - } lvt_timer; - -/*330*/ struct { /* LVT - Thermal Sensor */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_thermal; - -/*340*/ struct { /* LVT - Performance Counter */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_pc; - -/*350*/ struct { /* LVT - LINT0 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint0; - -/*360*/ struct { /* LVT - LINT1 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint1; - -/*370*/ struct { /* LVT - Error */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_error; - -/*380*/ struct { /* Timer Initial Count Register */ - u32 initial_count; - u32 __reserved_2[3]; - } timer_icr; - -/*390*/ const - struct { /* Timer Current Count Register */ - u32 curr_count; - u32 __reserved_2[3]; - } timer_ccr; - -/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; - -/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; - -/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; - -/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; - -/*3E0*/ struct { /* Timer Divide Configuration Register */ - u32 divisor : 4, - __reserved_1 : 28; - u32 __reserved_2[3]; - } timer_dcr; - -/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; - -} __attribute__ ((packed)); - -#undef u32 - #ifdef CONFIG_X86_32 #define BAD_APICID 0xFFu #else #define BAD_APICID 0xFFFFu #endif -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 -}; - #endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index fc0693569f7a..b5982b94bdba 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -12,15 +12,14 @@ #define REG_OUT "a" #endif -#define __HAVE_ARCH_SW_HWEIGHT - static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; - asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + asm_inline (ALTERNATIVE("call __sw_hweight32", + "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) + : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT + : [val] REG_IN (w)); return res; } @@ -46,9 +45,10 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; - asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + asm_inline (ALTERNATIVE("call __sw_hweight64", + "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) + : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT + : [val] REG_IN (w)); return res; } diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 3ac991d81e74..4c305305871b 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of the Linux kernel. * * Copyright (c) 2011-2014, Intel Corporation * Authors: Fenghua Yu <fenghua.yu@intel.com>, * H. Peter Anvin <hpa@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * */ #ifndef ASM_X86_ARCHRANDOM_H @@ -28,101 +15,46 @@ #define RDRAND_RETRY_LOOPS 10 -#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" -#define RDSEED_INT ".byte 0x0f,0xc7,0xf8" -#ifdef CONFIG_X86_64 -# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" -# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8" -#else -# define RDRAND_LONG RDRAND_INT -# define RDSEED_LONG RDSEED_INT -#endif - /* Unconditional execution of RDRAND and RDSEED */ -static inline bool rdrand_long(unsigned long *v) -{ - bool ok; - unsigned int retry = RDRAND_RETRY_LOOPS; - do { - asm volatile(RDRAND_LONG - CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); - if (ok) - return true; - } while (--retry); - return false; -} - -static inline bool rdrand_int(unsigned int *v) +static inline bool __must_check rdrand_long(unsigned long *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { - asm volatile(RDRAND_INT - CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + asm volatile("rdrand %[out]" + : "=@ccc" (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); return false; } -static inline bool rdseed_long(unsigned long *v) +static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; - asm volatile(RDSEED_LONG - CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + asm volatile("rdseed %[out]" + : "=@ccc" (ok), [out] "=r" (*v)); return ok; } -static inline bool rdseed_int(unsigned int *v) -{ - bool ok; - asm volatile(RDSEED_INT - CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); - return ok; -} - -/* Conditional execution based on CPU type */ -#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) -#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) - /* * These are the generic interfaces; they must not be declared if the - * stubs in <linux/random.h> are to be invoked, - * i.e. CONFIG_ARCH_RANDOM is not defined. + * stubs in <linux/random.h> are to be invoked. */ -#ifdef CONFIG_ARCH_RANDOM - -static inline bool arch_get_random_long(unsigned long *v) -{ - return arch_has_random() ? rdrand_long(v) : false; -} - -static inline bool arch_get_random_int(unsigned int *v) -{ - return arch_has_random() ? rdrand_int(v) : false; -} -static inline bool arch_get_random_seed_long(unsigned long *v) +static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) { - return arch_has_random_seed() ? rdseed_long(v) : false; + return max_longs && static_cpu_has(X86_FEATURE_RDRAND) && rdrand_long(v) ? 1 : 0; } -static inline bool arch_get_random_seed_int(unsigned int *v) +static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) { - return arch_has_random_seed() ? rdseed_int(v) : false; + return max_longs && static_cpu_has(X86_FEATURE_RDSEED) && rdseed_long(v) ? 1 : 0; } -extern void x86_init_rdrand(struct cpuinfo_x86 *c); - -#else /* !CONFIG_ARCH_RANDOM */ - -static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { } - -#endif /* !CONFIG_ARCH_RANDOM */ +#ifndef CONFIG_UML +void x86_init_rdrand(struct cpuinfo_x86 *c); +#endif #endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 1908214b9125..11c6fecc3ad7 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -1,41 +1,25 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <asm/ftrace.h> #include <linux/uaccess.h> +#include <linux/pgtable.h> #include <asm/string.h> #include <asm/page.h> #include <asm/checksum.h> +#include <asm/mce.h> #include <asm-generic/asm-prototypes.h> -#include <asm/page.h> -#include <asm/pgtable.h> #include <asm/special_insns.h> #include <asm/preempt.h> #include <asm/asm.h> +#include <asm/fred.h> +#include <asm/gsseg.h> +#include <asm/nospec-branch.h> -#ifndef CONFIG_X86_CMPXCHG64 +#ifndef CONFIG_X86_CX8 extern void cmpxchg8b_emu(void); #endif -#ifdef CONFIG_RETPOLINE -#ifdef CONFIG_X86_32 -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); -#else -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); -INDIRECT_THUNK(8) -INDIRECT_THUNK(9) -INDIRECT_THUNK(10) -INDIRECT_THUNK(11) -INDIRECT_THUNK(12) -INDIRECT_THUNK(13) -INDIRECT_THUNK(14) -INDIRECT_THUNK(15) +#ifdef CONFIG_STACKPROTECTOR +extern unsigned long __ref_stack_chk_guard; #endif -INDIRECT_THUNK(ax) -INDIRECT_THUNK(bx) -INDIRECT_THUNK(cx) -INDIRECT_THUNK(dx) -INDIRECT_THUNK(si) -INDIRECT_THUNK(di) -INDIRECT_THUNK(bp) -#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 6467757bb39f..0e8c611bc9e2 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -2,24 +2,31 @@ #ifndef _ASM_X86_ASM_H #define _ASM_X86_ASM_H -#ifdef __ASSEMBLY__ -# define __ASM_FORM(x) x -# define __ASM_FORM_RAW(x) x -# define __ASM_FORM_COMMA(x) x, +#include <linux/annotate.h> + +#ifdef __ASSEMBLER__ +# define __ASM_FORM(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, +# define __ASM_REGPFX % #else -# define __ASM_FORM(x) " " #x " " -# define __ASM_FORM_RAW(x) #x -# define __ASM_FORM_COMMA(x) " " #x "," +#include <linux/stringify.h> +# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " +# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) +# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," +# define __ASM_REGPFX %% #endif +#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) + #ifndef __x86_64__ /* 32 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(a) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) +# define __ASM_SEL(a,b) __ASM_FORM(a) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else /* 64 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(b) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) +# define __ASM_SEL(a,b) __ASM_FORM(b) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ @@ -46,6 +53,9 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */ +#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip)) + #ifndef __x86_64__ /* 32 bit */ @@ -105,102 +115,106 @@ #endif -/* - * Macros to generate condition code outputs from inline assembly, - * The output operand must be type "bool". - */ -#ifdef __GCC_ASM_FLAG_OUTPUTS__ -# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" -# define CC_OUT(c) "=@cc" #c -#else -# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" -# define CC_OUT(c) [_cc_ ## c] "=qm" -#endif - -/* Exception table entry */ -#ifdef __ASSEMBLY__ -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ - .pushsection "__ex_table","a" ; \ - .balign 4 ; \ - .long (from) - . ; \ - .long (to) - . ; \ - .long (handler) - . ; \ - .popsection +#ifndef __ASSEMBLER__ +static __always_inline __pure void *rip_rel_ptr(void *p) +{ + asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p)); -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + return p; +} +#endif -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) +#ifdef __KERNEL__ -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) +#ifndef COMPILE_OFFSETS +#include <asm/asm-offsets.h> +#endif -# define _ASM_EXTABLE_EX(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) +# include <asm/extable_fixup_types.h> -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) +/* Exception table entry */ +#ifdef __ASSEMBLER__ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ + .pushsection "__ex_table", "aM", @progbits, EXTABLE_SIZE ; \ + .balign 4 ; \ + .long (from) - . ; \ + .long (to) - . ; \ + .long type ; \ + .popsection -# define _ASM_NOKPROBE(entry) \ +# ifdef CONFIG_KPROBES +# define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ _ASM_ALIGN ; \ _ASM_PTR (entry); \ .popsection - -.macro ALIGN_DESTINATION - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE_UA(100b, 103b) - _ASM_EXTABLE_UA(101b, 103b) - .endm - -#else -# define _EXPAND_EXTABLE_HANDLE(x) #x -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ - " .pushsection \"__ex_table\",\"a\"\n" \ +# else +# define _ASM_NOKPROBE(entry) +# endif + +#else /* ! __ASSEMBLER__ */ + +# define DEFINE_EXTABLE_TYPE_REG \ + ".macro extable_type_reg type:req reg:req\n" \ + ".set .Lfound, 0\n" \ + ".set .Lregnr, 0\n" \ + ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ + ".endif\n" \ + ".set .Lregnr, .Lregnr+1\n" \ + ".endr\n" \ + ".set .Lregnr, 0\n" \ + ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ + ".ifc \\reg, %%\\rs\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ + ".endif\n" \ + ".set .Lregnr, .Lregnr+1\n" \ + ".endr\n" \ + ".if (.Lfound != 1)\n" \ + ".error \"extable_type_reg: bad register argument\"\n" \ + ".endif\n" \ + ".endm\n" + +# define UNDEFINE_EXTABLE_TYPE_REG \ + ".purgem extable_type_reg\n" + +# define _ASM_EXTABLE_TYPE(from, to, type) \ + " .pushsection __ex_table, \"aM\", @progbits, " \ + __stringify(EXTABLE_SIZE) "\n" \ " .balign 4\n" \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ - " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ + " .long " __stringify(type) " \n" \ " .popsection\n" -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) +# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \ + " .pushsection __ex_table, \"aM\", @progbits, " \ + __stringify(EXTABLE_SIZE) "\n" \ + " .balign 4\n" \ + " .long (" #from ") - .\n" \ + " .long (" #to ") - .\n" \ + DEFINE_EXTABLE_TYPE_REG \ + "extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\ + UNDEFINE_EXTABLE_TYPE_REG \ + " .popsection\n" -# define _ASM_EXTABLE_EX(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) +/* For C file, we already have NOKPROBE_SYMBOL macro */ -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) +/* Insert a comma if args are non-empty */ +#define COMMA(x...) __COMMA(x) +#define __COMMA(...) , ##__VA_ARGS__ -/* For C file, we already have NOKPROBE_SYMBOL macro */ -#endif +/* + * Combine multiple asm inline constraint args into a single arg for passing to + * another macro. + */ +#define ASM_OUTPUT(x...) x +#define ASM_INPUT(x...) x -#ifndef __ASSEMBLY__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -209,6 +223,35 @@ */ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) +#endif /* __ASSEMBLER__ */ + +#define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT) + +#define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS) + +#define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT) + +/* + * Both i386 and x86_64 returns 64-bit values in edx:eax for certain + * instructions, but GCC's "A" constraint has different meanings. + * For i386, "A" means exactly edx:eax, while for x86_64 it + * means rax *or* rdx. + * + * These helpers wrapping these semantic differences save one instruction + * clearing the high half of 'low': + */ +#ifdef CONFIG_X86_64 +# define EAX_EDX_DECLARE_ARGS(val, low, high) unsigned long low, high +# define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) +# define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) +#else +# define EAX_EDX_DECLARE_ARGS(val, low, high) u64 val +# define EAX_EDX_VAL(val, low, high) (val) +# define EAX_EDX_RET(val, low, high) "=A" (val) #endif +#endif /* __KERNEL__ */ #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index ea3d95275b43..75743f1dfd4e 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -14,206 +14,115 @@ * resource counting etc.. */ -#define ATOMIC_INIT(i) { (i) } - -/** - * arch_atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ static __always_inline int arch_atomic_read(const atomic_t *v) { /* * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, * it's non-inlined function that increases binary size and stack usage. */ - return READ_ONCE((v)->counter); + return __READ_ONCE((v)->counter); } -/** - * arch_atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ static __always_inline void arch_atomic_set(atomic_t *v, int i) { - WRITE_ONCE(v->counter, i); + __WRITE_ONCE(v->counter, i); } -/** - * arch_atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ static __always_inline void arch_atomic_add(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "addl %1,%0" + asm_inline volatile(LOCK_PREFIX "addl %1, %0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } -/** - * arch_atomic_sub - subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "subl %1,%0" + asm_inline volatile(LOCK_PREFIX "subl %1, %0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } -/** - * arch_atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test -/** - * arch_atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic_inc(atomic_t *v) { - asm volatile(LOCK_PREFIX "incl %0" - : "+m" (v->counter)); + asm_inline volatile(LOCK_PREFIX "incl %0" + : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc -/** - * arch_atomic_dec - decrement atomic variable - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic_dec(atomic_t *v) { - asm volatile(LOCK_PREFIX "decl %0" - : "+m" (v->counter)); + asm_inline volatile(LOCK_PREFIX "decl %0" + : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec -/** - * arch_atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test -/** - * arch_atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test -/** - * arch_atomic_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative -/** - * arch_atomic_add_return - add integer and return - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns @i + @v - */ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } +#define arch_atomic_add_return arch_atomic_add_return -/** - * arch_atomic_sub_return - subtract integer and return - * @v: pointer of type atomic_t - * @i: integer value to subtract - * - * Atomically subtracts @i from @v and returns @v - @i - */ -static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) -{ - return arch_atomic_add_return(-i, v); -} +#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v) static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } +#define arch_atomic_fetch_add arch_atomic_fetch_add -static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) -{ - return xadd(&v->counter, -i); -} +#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v) static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return arch_cmpxchg(&v->counter, old, new); } +#define arch_atomic_cmpxchg arch_atomic_cmpxchg -#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { - return try_cmpxchg(&v->counter, old, new); + return arch_try_cmpxchg(&v->counter, old, new); } +#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg -static inline int arch_atomic_xchg(atomic_t *v, int new) +static __always_inline int arch_atomic_xchg(atomic_t *v, int new) { return arch_xchg(&v->counter, new); } +#define arch_atomic_xchg arch_atomic_xchg -static inline void arch_atomic_and(int i, atomic_t *v) +static __always_inline void arch_atomic_and(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "andl %1,%0" + asm_inline volatile(LOCK_PREFIX "andl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); } -static inline int arch_atomic_fetch_and(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -221,16 +130,17 @@ static inline int arch_atomic_fetch_and(int i, atomic_t *v) return val; } +#define arch_atomic_fetch_and arch_atomic_fetch_and -static inline void arch_atomic_or(int i, atomic_t *v) +static __always_inline void arch_atomic_or(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "orl %1,%0" + asm_inline volatile(LOCK_PREFIX "orl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); } -static inline int arch_atomic_fetch_or(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -238,16 +148,17 @@ static inline int arch_atomic_fetch_or(int i, atomic_t *v) return val; } +#define arch_atomic_fetch_or arch_atomic_fetch_or -static inline void arch_atomic_xor(int i, atomic_t *v) +static __always_inline void arch_atomic_xor(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "xorl %1,%0" + asm_inline volatile(LOCK_PREFIX "xorl %1, %0" : "+m" (v->counter) : "ir" (i) : "memory"); } -static inline int arch_atomic_fetch_xor(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -255,6 +166,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v) return val; } +#define arch_atomic_fetch_xor arch_atomic_fetch_xor #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> @@ -262,6 +174,4 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v) # include <asm/atomic64_64.h> #endif -#include <asm-generic/atomic-instrumented.h> - #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 6a5b0ec460da..ab838205c1c6 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -9,11 +9,37 @@ /* An 64bit atomic type */ typedef struct { - u64 __aligned(8) counter; + s64 __aligned(8) counter; } atomic64_t; #define ATOMIC64_INIT(val) { (val) } +/* + * Read an atomic64_t non-atomically. + * + * This is intended to be used in cases where a subsequent atomic operation + * will handle the torn value, and can be used to prime the first iteration + * of unconditional try_cmpxchg() loops, e.g.: + * + * s64 val = arch_atomic64_read_nonatomic(v); + * do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i); + * + * This is NOT safe to use where the value is not always checked by a + * subsequent atomic operation, such as in conditional try_cmpxchg() loops + * that can break before the atomic operation, e.g.: + * + * s64 val = arch_atomic64_read_nonatomic(v); + * do { + * if (condition(val)) + * break; + * } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i); + */ +static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v) +{ + /* See comment in arch_atomic_read(). */ + return __READ_ONCE(v->counter); +} + #define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...) #ifndef ATOMIC64_EXPORT #define ATOMIC64_DECL_ONE __ATOMIC64_DECL @@ -22,16 +48,20 @@ typedef struct { ATOMIC64_EXPORT(atomic64_##sym) #endif -#ifdef CONFIG_X86_CMPXCHG64 -#define __alternative_atomic64(f, g, out, in...) \ - asm volatile("call %P[func]" \ - : out : [func] "i" (atomic64_##g##_cx8), ## in) +#ifdef CONFIG_X86_CX8 +#define __alternative_atomic64(f, g, out, in, clobbers...) \ + asm volatile("call %c[func]" \ + : ALT_OUTPUT_SP(out) \ + : [func] "i" (atomic64_##g##_cx8) \ + COMMA(in) \ + : clobbers) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) #else -#define __alternative_atomic64(f, g, out, in...) \ - alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ - X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in) +#define __alternative_atomic64(f, g, out, in, clobbers...) \ + alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ + X86_FEATURE_CX8, ASM_OUTPUT(out), \ + ASM_INPUT(in), clobbers) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \ ATOMIC64_DECL_ONE(sym##_386) @@ -42,8 +72,8 @@ ATOMIC64_DECL_ONE(inc_386); ATOMIC64_DECL_ONE(dec_386); #endif -#define alternative_atomic64(f, out, in...) \ - __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in) +#define alternative_atomic64(f, out, in, clobbers...) \ + __alternative_atomic64(f, f, ASM_OUTPUT(out), ASM_INPUT(in), clobbers) ATOMIC64_DECL(read); ATOMIC64_DECL(set); @@ -61,204 +91,154 @@ ATOMIC64_DECL(add_unless); #undef __ATOMIC64_DECL #undef ATOMIC64_EXPORT -/** - * arch_atomic64_cmpxchg - cmpxchg atomic64 variable - * @v: pointer to type atomic64_t - * @o: expected value - * @n: new value - * - * Atomically sets @v to @n if it was equal to @o and returns - * the old value. - */ +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +{ + return arch_cmpxchg64(&v->counter, old, new); +} +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, - long long n) +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { - return arch_cmpxchg64(&v->counter, o, n); + return arch_try_cmpxchg64(&v->counter, old, new); } +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg -/** - * arch_atomic64_xchg - xchg atomic64 variable - * @v: pointer to type atomic64_t - * @n: value to assign - * - * Atomically xchgs the value of @v to @n and returns - * the old value. - */ -static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) +static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { - long long o; + s64 o; unsigned high = (unsigned)(n >> 32); unsigned low = (unsigned)n; - alternative_atomic64(xchg, "=&A" (o), - "S" (v), "b" (low), "c" (high) - : "memory"); + alternative_atomic64(xchg, + "=&A" (o), + ASM_INPUT("S" (v), "b" (low), "c" (high)), + "memory"); return o; } +#define arch_atomic64_xchg arch_atomic64_xchg -/** - * arch_atomic64_set - set atomic64 variable - * @v: pointer to type atomic64_t - * @i: value to assign - * - * Atomically sets the value of @v to @n. - */ -static inline void arch_atomic64_set(atomic64_t *v, long long i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; - alternative_atomic64(set, /* no output */, - "S" (v), "b" (low), "c" (high) - : "eax", "edx", "memory"); + alternative_atomic64(set, + /* no output */, + ASM_INPUT("S" (v), "b" (low), "c" (high)), + "eax", "edx", "memory"); } -/** - * arch_atomic64_read - read atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically reads the value of @v and returns it. - */ -static inline long long arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { - long long r; - alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); + s64 r; + alternative_atomic64(read, "=&A" (r), "c" (v), "memory"); return r; } -/** - * arch_atomic64_add_return - add and return - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns @i + *@v - */ -static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); return i; } +#define arch_atomic64_add_return arch_atomic64_add_return -/* - * Other variants with different arithmetic operators: - */ -static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); return i; } +#define arch_atomic64_sub_return arch_atomic64_sub_return -static inline long long arch_atomic64_inc_return(atomic64_t *v) +static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { - long long a; - alternative_atomic64(inc_return, "=&A" (a), - "S" (v) : "memory", "ecx"); + s64 a; + alternative_atomic64(inc_return, + "=&A" (a), + "S" (v), + "memory", "ecx"); return a; } #define arch_atomic64_inc_return arch_atomic64_inc_return -static inline long long arch_atomic64_dec_return(atomic64_t *v) +static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { - long long a; - alternative_atomic64(dec_return, "=&A" (a), - "S" (v) : "memory", "ecx"); + s64 a; + alternative_atomic64(dec_return, + "=&A" (a), + "S" (v), + "memory", "ecx"); return a; } #define arch_atomic64_dec_return arch_atomic64_dec_return -/** - * arch_atomic64_add - add integer to atomic64 variable - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v. - */ -static inline long long arch_atomic64_add(long long i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); - return i; + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); } -/** - * arch_atomic64_sub - subtract the atomic64 variable - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v. - */ -static inline long long arch_atomic64_sub(long long i, atomic64_t *v) +static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, - ASM_OUTPUT2("+A" (i), "+c" (v)), - ASM_NO_INPUT_CLOBBER("memory")); - return i; + ASM_OUTPUT("+A" (i), "+c" (v)), + /* no input */, + "memory"); } -/** - * arch_atomic64_inc - increment atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1. - */ -static inline void arch_atomic64_inc(atomic64_t *v) +static __always_inline void arch_atomic64_inc(atomic64_t *v) { - __alternative_atomic64(inc, inc_return, /* no output */, - "S" (v) : "memory", "eax", "ecx", "edx"); + __alternative_atomic64(inc, inc_return, + /* no output */, + "S" (v), + "memory", "eax", "ecx", "edx"); } #define arch_atomic64_inc arch_atomic64_inc -/** - * arch_atomic64_dec - decrement atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1. - */ -static inline void arch_atomic64_dec(atomic64_t *v) +static __always_inline void arch_atomic64_dec(atomic64_t *v) { - __alternative_atomic64(dec, dec_return, /* no output */, - "S" (v) : "memory", "eax", "ecx", "edx"); + __alternative_atomic64(dec, dec_return, + /* no output */, + "S" (v), + "memory", "eax", "ecx", "edx"); } #define arch_atomic64_dec arch_atomic64_dec -/** - * arch_atomic64_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns non-zero if the add was done, zero otherwise. - */ -static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, - long long u) +static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); alternative_atomic64(add_unless, - ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)), - "S" (v) : "memory"); + ASM_OUTPUT("+A" (a), "+c" (low), "+D" (high)), + "S" (v), + "memory"); return (int)a; } +#define arch_atomic64_add_unless arch_atomic64_add_unless -static inline int arch_atomic64_inc_not_zero(atomic64_t *v) +static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; - alternative_atomic64(inc_not_zero, "=&a" (r), - "S" (v) : "ecx", "edx", "memory"); + alternative_atomic64(inc_not_zero, + "=&a" (r), + "S" (v), + "ecx", "edx", "memory"); return r; } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { - long long r; - alternative_atomic64(dec_if_positive, "=&A" (r), - "S" (v) : "ecx", "memory"); + s64 r; + alternative_atomic64(dec_if_positive, + "=&A" (r), + "S" (v), + "ecx", "memory"); return r; } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive @@ -266,69 +246,66 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void arch_atomic64_and(long long i, atomic64_t *v) +static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); } -static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); - return old; + return val; } +#define arch_atomic64_fetch_and arch_atomic64_fetch_and -static inline void arch_atomic64_or(long long i, atomic64_t *v) +static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); } -static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); - return old; + return val; } +#define arch_atomic64_fetch_or arch_atomic64_fetch_or -static inline void arch_atomic64_xor(long long i, atomic64_t *v) +static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); } -static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); - return old; + return val; } +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor -static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 val = arch_atomic64_read_nonatomic(v); - while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) - c = old; + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i)); - return old; + return val; } +#define arch_atomic64_fetch_add arch_atomic64_fetch_add #define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v)) diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index dadc20adba21..87b496325b5b 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -10,196 +10,113 @@ #define ATOMIC64_INIT(i) { (i) } -/** - * arch_atomic64_read - read atomic64 variable - * @v: pointer of type atomic64_t - * - * Atomically reads the value of @v. - * Doesn't imply a read memory barrier. - */ -static inline long arch_atomic64_read(const atomic64_t *v) -{ - return READ_ONCE((v)->counter); -} - -/** - * arch_atomic64_set - set atomic64 variable - * @v: pointer to type atomic64_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ -static inline void arch_atomic64_set(atomic64_t *v, long i) -{ - WRITE_ONCE(v->counter, i); -} - -/** - * arch_atomic64_add - add integer to atomic64 variable - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v. - */ -static __always_inline void arch_atomic64_add(long i, atomic64_t *v) -{ - asm volatile(LOCK_PREFIX "addq %1,%0" +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) +{ + return __READ_ONCE((v)->counter); +} + +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) +{ + __WRITE_ONCE(v->counter, i); +} + +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) +{ + asm_inline volatile(LOCK_PREFIX "addq %1, %0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } -/** - * arch_atomic64_sub - subtract the atomic64 variable - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v. - */ -static inline void arch_atomic64_sub(long i, atomic64_t *v) +static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "subq %1,%0" + asm_inline volatile(LOCK_PREFIX "subq %1, %0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } -/** - * arch_atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) +static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test -/** - * arch_atomic64_inc - increment atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic64_inc(atomic64_t *v) { - asm volatile(LOCK_PREFIX "incq %0" + asm_inline volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } #define arch_atomic64_inc arch_atomic64_inc -/** - * arch_atomic64_dec - decrement atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic64_dec(atomic64_t *v) { - asm volatile(LOCK_PREFIX "decq %0" + asm_inline volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } #define arch_atomic64_dec arch_atomic64_dec -/** - * arch_atomic64_dec_and_test - decrement and test - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static inline bool arch_atomic64_dec_and_test(atomic64_t *v) +static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test -/** - * arch_atomic64_inc_and_test - increment and test - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static inline bool arch_atomic64_inc_and_test(atomic64_t *v) +static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test -/** - * arch_atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) +static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } #define arch_atomic64_add_negative arch_atomic64_add_negative -/** - * arch_atomic64_add_return - add and return - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns @i + @v - */ -static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); } +#define arch_atomic64_add_return arch_atomic64_add_return -static inline long arch_atomic64_sub_return(long i, atomic64_t *v) -{ - return arch_atomic64_add_return(-i, v); -} +#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v) -static inline long arch_atomic64_fetch_add(long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } +#define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v) -{ - return xadd(&v->counter, -i); -} +#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v) -static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg -static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { - return try_cmpxchg(&v->counter, old, new); + return arch_try_cmpxchg(&v->counter, old, new); } +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg -static inline long arch_atomic64_xchg(atomic64_t *v, long new) +static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } +#define arch_atomic64_xchg arch_atomic64_xchg -static inline void arch_atomic64_and(long i, atomic64_t *v) +static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "andq %1,%0" + asm_inline volatile(LOCK_PREFIX "andq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); } -static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -207,16 +124,17 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); return val; } +#define arch_atomic64_fetch_and arch_atomic64_fetch_and -static inline void arch_atomic64_or(long i, atomic64_t *v) +static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "orq %1,%0" + asm_inline volatile(LOCK_PREFIX "orq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); } -static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -224,16 +142,17 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); return val; } +#define arch_atomic64_fetch_or arch_atomic64_fetch_or -static inline void arch_atomic64_xor(long i, atomic64_t *v) +static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "xorq %1,%0" + asm_inline volatile(LOCK_PREFIX "xorq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); } -static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -241,5 +160,6 @@ static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); return val; } +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor #endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h new file mode 100644 index 000000000000..fa918f01333e --- /dev/null +++ b/arch/x86/include/asm/audit.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AUDIT_H +#define _ASM_X86_AUDIT_H + +int ia32_classify_syscall(unsigned int syscall); + +extern unsigned ia32_dir_class[]; +extern unsigned ia32_write_class[]; +extern unsigned ia32_read_class[]; +extern unsigned ia32_chattr_class[]; +extern unsigned ia32_signal_class[]; + + +#endif /* _ASM_X86_AUDIT_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 14de0432d288..db70832232d4 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -12,16 +12,16 @@ */ #ifdef CONFIG_X86_32 -#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \ +#define mb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "mfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \ +#define rmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "lfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \ +#define wmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "sfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") #else -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") +#define __mb() asm volatile("mfence":::"memory") +#define __rmb() asm volatile("lfence":::"memory") +#define __wmb() asm volatile("sfence" ::: "memory") #endif /** @@ -33,33 +33,25 @@ * Returns: * 0 - (index < size) */ -static inline unsigned long array_index_mask_nospec(unsigned long index, - unsigned long size) -{ - unsigned long mask; - - asm volatile ("cmp %1,%2; sbb %0,%0;" - :"=r" (mask) - :"g"(size),"r" (index) - :"cc"); - return mask; -} - -/* Override the default implementation from linux/nospec.h. */ -#define array_index_mask_nospec array_index_mask_nospec +#define array_index_mask_nospec(idx,sz) ({ \ + typeof((idx)+(sz)) __idx = (idx); \ + typeof(__idx) __sz = (sz); \ + unsigned long __mask; \ + asm volatile ("cmp %1,%2; sbb %0,%0" \ + :"=r" (__mask) \ + :ASM_INPUT_G (__sz), \ + "r" (__idx) \ + :"cc"); \ + __mask; }) /* Prevent speculative execution past this barrier. */ -#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ - "lfence", X86_FEATURE_LFENCE_RDTSC) +#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC) -#define dma_rmb() barrier() -#define dma_wmb() barrier() +#define __dma_rmb() barrier() +#define __dma_wmb() barrier() + +#define __smp_mb() asm volatile("lock addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc") -#ifdef CONFIG_X86_32 -#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc") -#else -#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc") -#endif #define __smp_rmb() dma_rmb() #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) @@ -80,8 +72,11 @@ do { \ }) /* Atomic operations are already serializing on x86 */ -#define __smp_mb__before_atomic() barrier() -#define __smp_mb__after_atomic() barrier() +#define __smp_mb__before_atomic() do { } while (0) +#define __smp_mb__after_atomic() do { } while (0) + +/* Writing to CR3 provides a full memory barrier in switch_mm(). */ +#define smp_mb__after_switch_mm() do { } while (0) #include <asm-generic/barrier.h> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index ad7b210aa3f6..c2ce213f2b9b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -36,245 +36,130 @@ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ -#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) -/* Technically wrong, but this avoids compilation errors on some gcc - versions. */ -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) -#else -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) -#endif +#define RLONG_ADDR(x) "m" (*(volatile long *) (x)) +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) -#define ADDR BITOP_ADDR(addr) +#define ADDR RLONG_ADDR(addr) /* * We do the locked ops that don't return the old value as * a mask operation on a byte. */ -#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * - * Note: there are no guarantees that this function will not be reordered - * on non x86 architectures, so if you are writing portable code, - * make sure not to rely on its reordering guarantees. - * - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ static __always_inline void -set_bit(long nr, volatile unsigned long *addr) +arch_set_bit(long nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) { - asm volatile(LOCK_PREFIX "orb %1,%0" + if (__builtin_constant_p(nr)) { + asm_inline volatile(LOCK_PREFIX "orb %b1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" ((u8)CONST_MASK(nr)) + : "iq" (CONST_MASK(nr)) : "memory"); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" - : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } -/** - * __set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void __set_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___set_bit(unsigned long nr, volatile unsigned long *addr) { - asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); + asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() - * in order to ensure changes are visible on other processors. - */ static __always_inline void -clear_bit(long nr, volatile unsigned long *addr) +arch_clear_bit(long nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) { - asm volatile(LOCK_PREFIX "andb %1,%0" + if (__builtin_constant_p(nr)) { + asm_inline volatile(LOCK_PREFIX "andb %b1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" ((u8)~CONST_MASK(nr))); + : "iq" (~CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" - : BITOP_ADDR(addr) - : "Ir" (nr)); + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } -/* - * clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and implies release semantics before the memory - * operation. It can be used for an unlock. - */ -static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void +arch_clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); - clear_bit(nr, addr); + arch_clear_bit(nr, addr); } -static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___clear_bit(unsigned long nr, volatile unsigned long *addr) { - asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *addr) { bool negative; - asm volatile(LOCK_PREFIX "andb %2,%1" - CC_SET(s) - : CC_OUT(s) (negative), ADDR - : "ir" ((char) ~(1 << nr)) : "memory"); + asm_inline volatile(LOCK_PREFIX "xorb %2,%1" + : "=@ccs" (negative), WBYTE_ADDR(addr) + : "iq" ((char)mask) : "memory"); return negative; } +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte -// Let everybody know we have it -#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte - -/* - * __clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * __clear_bit() is non-atomic and implies release semantics before the memory - * operation. It can be used for an unlock if no other CPUs can concurrently - * modify other bits in the word. - * - * No memory barrier is required here, because x86 cannot reorder stores past - * older loads. Same principle as spin_unlock. - */ -static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void +arch___clear_bit_unlock(long nr, volatile unsigned long *addr) { - barrier(); - __clear_bit(nr, addr); + arch___clear_bit(nr, addr); } -/** - * __change_bit - Toggle a bit in memory - * @nr: the bit to change - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void __change_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___change_bit(unsigned long nr, volatile unsigned long *addr) { - asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to change - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __always_inline void change_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch_change_bit(long nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) { - asm volatile(LOCK_PREFIX "xorb %1,%0" + if (__builtin_constant_p(nr)) { + asm_inline volatile(LOCK_PREFIX "xorb %b1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" ((u8)CONST_MASK(nr))); + : "iq" (CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" - : BITOP_ADDR(addr) - : "Ir" (nr)); + asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); } } -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_set_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr); } -/** - * test_and_set_bit_lock - Set a bit and return its old value for lock - * @nr: Bit to set - * @addr: Address to count from - * - * This is the same as test_and_set_bit on x86. - */ static __always_inline bool -test_and_set_bit_lock(long nr, volatile unsigned long *addr) +arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr) { - return test_and_set_bit(nr, addr); + return arch_test_and_set_bit(nr, addr); } -/** - * __test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { bool oldbit; asm(__ASM_SIZE(bts) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit), ADDR - : "Ir" (nr)); + : "=@ccc" (oldbit) + : ADDR, "Ir" (nr) : "memory"); return oldbit; } -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_clear_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr); } -/** - * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - * +/* * Note: the operation is performed atomically with respect to * the local CPU, but not other CPUs. Portable code should not * rely on this behaviour. @@ -282,39 +167,31 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long * * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(btr) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit), ADDR - : "Ir" (nr)); + : "=@ccc" (oldbit) + : ADDR, "Ir" (nr) : "memory"); return oldbit; } -/* WARNING: non atomic and it can be reordered! */ -static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(btc) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit), ADDR - : "Ir" (nr) : "memory"); + : "=@ccc" (oldbit) + : ADDR, "Ir" (nr) : "memory"); return oldbit; } -/** - * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_change_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } @@ -325,31 +202,51 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + : "=@ccnz" (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; asm volatile(__ASM_SIZE(bt) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) - : "m" (*(unsigned long *)addr), "Ir" (nr)); + : "=@ccc" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); return oldbit; } -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ -/** - * test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static bool test_bit(int nr, const volatile unsigned long *addr); -#endif +static __always_inline bool +arch_test_bit(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) : + variable_test_bit(nr, addr); +} -#define test_bit(nr, addr) \ - (__builtin_constant_p((nr)) \ - ? constant_test_bit((nr), (addr)) \ - : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + +static __always_inline __attribute_const__ unsigned long variable__ffs(unsigned long word) +{ + asm("tzcnt %1,%0" + : "=r" (word) + : ASM_INPUT_RM (word)); + return word; +} /** * __ffs - find first set bit in word @@ -357,12 +254,14 @@ static bool test_bit(int nr, const volatile unsigned long *addr); * * Undefined if no bit exists, so code should check against 0 first. */ -static __always_inline unsigned long __ffs(unsigned long word) +#define __ffs(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(word) : \ + variable__ffs(word)) + +static __always_inline __attribute_const__ unsigned long variable_ffz(unsigned long word) { - asm("rep; bsf %1,%0" - : "=r" (word) - : "rm" (word)); - return word; + return variable__ffs(~word); } /** @@ -371,13 +270,10 @@ static __always_inline unsigned long __ffs(unsigned long word) * * Undefined if no zero exists, so code should check against ~0UL first. */ -static __always_inline unsigned long ffz(unsigned long word) -{ - asm("rep; bsf %1,%0" - : "=r" (word) - : "r" (~word)); - return word; -} +#define ffz(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(~word) : \ + variable_ffz(word)) /* * __fls: find last set bit in word @@ -385,29 +281,21 @@ static __always_inline unsigned long ffz(unsigned long word) * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned long __fls(unsigned long word) +static __always_inline __attribute_const__ unsigned long __fls(unsigned long word) { + if (__builtin_constant_p(word)) + return BITS_PER_LONG - 1 - __builtin_clzl(word); + asm("bsr %1,%0" : "=r" (word) - : "rm" (word)); + : ASM_INPUT_RM (word)); return word; } #undef ADDR #ifdef __KERNEL__ -/** - * ffs - find first set bit in word - * @x: the word to search - * - * This is defined the same way as the libc and compiler builtin ffs - * routines, therefore differs in spirit from the other bitops. - * - * ffs(value) returns 0 if value is 0 or the position of the first - * set bit if value is nonzero. The first (least significant) bit - * is at position 1. - */ -static __always_inline int ffs(int x) +static __always_inline __attribute_const__ int variable_ffs(int x) { int r; @@ -423,7 +311,7 @@ static __always_inline int ffs(int x) */ asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (-1)); + : ASM_INPUT_RM (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -438,6 +326,19 @@ static __always_inline int ffs(int x) } /** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x)) + +/** * fls - find last set bit in word * @x: the word to search * @@ -448,10 +349,13 @@ static __always_inline int ffs(int x) * set bit if value is nonzero. The last (most significant) bit is * at position 32. */ -static __always_inline int fls(unsigned int x) +static __always_inline __attribute_const__ int fls(unsigned int x) { int r; + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + #ifdef CONFIG_X86_64 /* * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the @@ -464,7 +368,7 @@ static __always_inline int fls(unsigned int x) */ asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (-1)); + : ASM_INPUT_RM (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -490,9 +394,12 @@ static __always_inline int fls(unsigned int x) * at position 64. */ #ifdef CONFIG_X86_64 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { int bitpos = -1; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its @@ -500,21 +407,23 @@ static __always_inline int fls64(__u64 x) */ asm("bsrq %1,%q0" : "+r" (bitpos) - : "rm" (x)); + : ASM_INPUT_RM (x)); return bitpos + 1; } #else #include <asm-generic/bitops/fls64.h> #endif -#include <asm-generic/bitops/find.h> - #include <asm-generic/bitops/sched.h> #include <asm/arch_hweight.h> #include <asm-generic/bitops/const_hweight.h> +#include <asm-generic/bitops/instrumented-atomic.h> +#include <asm-generic/bitops/instrumented-non-atomic.h> +#include <asm-generic/bitops/instrumented-lock.h> + #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic-setbit.h> diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 680c320363db..f7b67cb73915 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -6,11 +6,6 @@ #include <asm/pgtable_types.h> #include <uapi/asm/boot.h> -/* Physical address where kernel should be loaded. */ -#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ - + (CONFIG_PHYSICAL_ALIGN - 1)) \ - & ~(CONFIG_PHYSICAL_ALIGN - 1)) - /* Minimum kernel alignment, as a power of two */ #ifdef CONFIG_X86_64 # define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT @@ -24,35 +19,82 @@ # error "Invalid value for CONFIG_PHYSICAL_ALIGN" #endif -#ifdef CONFIG_KERNEL_BZIP2 +#if defined(CONFIG_KERNEL_BZIP2) # define BOOT_HEAP_SIZE 0x400000 -#else /* !CONFIG_KERNEL_BZIP2 */ +#elif defined(CONFIG_KERNEL_ZSTD) +/* + * Zstd needs to allocate the ZSTD_DCtx in order to decompress the kernel. + * The ZSTD_DCtx is ~160KB, so set the heap size to 192KB because it is a + * round number and to allow some slack. + */ +# define BOOT_HEAP_SIZE 0x30000 +#else # define BOOT_HEAP_SIZE 0x10000 #endif #ifdef CONFIG_X86_64 # define BOOT_STACK_SIZE 0x4000 +/* + * Used by decompressor's startup_32() to allocate page tables for identity + * mapping of the 4G of RAM in 4-level paging mode: + * - 1 level4 table; + * - 1 level3 table; + * - 4 level2 table that maps everything with 2M pages; + * + * The additional level5 table needed for 5-level paging is allocated from + * trampoline_32bit memory. + */ # define BOOT_INIT_PGT_SIZE (6*4096) -# ifdef CONFIG_RANDOMIZE_BASE + /* - * Assuming all cross the 512GB boundary: - * 1 page for level4 - * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel - * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP). - * Total is 19 pages. + * Total number of page tables kernel_add_identity_map() can allocate, + * including page tables consumed by startup_32(). + * + * Worst-case scenario: + * - 5-level paging needs 1 level5 table; + * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel, + * assuming all of them cross 256T boundary: + * + 4*2 level4 table; + * + 4*2 level3 table; + * + 4*2 level2 table; + * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM): + * + 1 level4 table; + * + 1 level3 table; + * + 1 level2 table; + * Total: 28 tables + * + * Add 4 spare table in case decompressor touches anything beyond what is + * accounted above. Warn if it happens. */ -# ifdef CONFIG_X86_VERBOSE_BOOTUP -# define BOOT_PGT_SIZE (19*4096) -# else /* !CONFIG_X86_VERBOSE_BOOTUP */ -# define BOOT_PGT_SIZE (17*4096) -# endif -# else /* !CONFIG_RANDOMIZE_BASE */ -# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE -# endif +# define BOOT_PGT_SIZE_WARN (28*4096) +# define BOOT_PGT_SIZE (32*4096) #else /* !CONFIG_X86_64 */ # define BOOT_STACK_SIZE 0x1000 #endif +#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) + +#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 + +#ifndef __ASSEMBLER__ +extern unsigned int output_len; +extern const unsigned long kernel_text_size; +extern const unsigned long kernel_inittext_offset; +extern const unsigned long kernel_inittext_size; +extern const unsigned long kernel_total_size; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)); + +extern struct boot_params *boot_params_ptr; +extern unsigned long *trampoline_32bit; +extern const u16 trampoline_ljmp_imm_offset; + +void trampoline_32bit_src(void *trampoline, bool enable_5lvl); + +#endif + #endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index f6f6ef436599..d90ae472fb76 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -18,13 +18,27 @@ * Note: efi_info is commonly left uninitialized, but that field has a * private magic, so it is better to leave it unchanged. */ + +#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); }) + +#define BOOT_PARAM_PRESERVE(struct_member) \ + { \ + .start = offsetof(struct boot_params, struct_member), \ + .len = sizeof_mbr(struct boot_params, struct_member), \ + } + +struct boot_params_to_save { + unsigned int start; + unsigned int len; +}; + static void sanitize_boot_params(struct boot_params *boot_params) { /* * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear * this field. The purpose of this field is to guarantee * compliance with the x86 boot spec located in - * Documentation/x86/boot.txt . That spec says that the + * Documentation/arch/x86/boot.rst . That spec says that the * *whole* structure should be cleared, after which only the * portion defined by struct setup_header (boot_params->hdr) * should be copied in. @@ -35,21 +49,42 @@ static void sanitize_boot_params(struct boot_params *boot_params) * problems again. */ if (boot_params->sentinel) { - /* fields in boot_params are left uninitialized, clear them */ - boot_params->acpi_rsdp_addr = 0; - memset(&boot_params->ext_ramdisk_image, 0, - (char *)&boot_params->efi_info - - (char *)&boot_params->ext_ramdisk_image); - memset(&boot_params->kbd_status, 0, - (char *)&boot_params->hdr - - (char *)&boot_params->kbd_status); - memset(&boot_params->_pad7[0], 0, - (char *)&boot_params->edd_mbr_sig_buffer[0] - - (char *)&boot_params->_pad7[0]); - memset(&boot_params->_pad8[0], 0, - (char *)&boot_params->eddbuf[0] - - (char *)&boot_params->_pad8[0]); - memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); + static struct boot_params scratch; + char *bp_base = (char *)boot_params; + char *save_base = (char *)&scratch; + int i; + + const struct boot_params_to_save to_save[] = { + BOOT_PARAM_PRESERVE(screen_info), + BOOT_PARAM_PRESERVE(apm_bios_info), + BOOT_PARAM_PRESERVE(tboot_addr), + BOOT_PARAM_PRESERVE(ist_info), + BOOT_PARAM_PRESERVE(hd0_info), + BOOT_PARAM_PRESERVE(hd1_info), + BOOT_PARAM_PRESERVE(sys_desc_table), + BOOT_PARAM_PRESERVE(olpc_ofw_header), + BOOT_PARAM_PRESERVE(efi_info), + BOOT_PARAM_PRESERVE(alt_mem_k), + BOOT_PARAM_PRESERVE(scratch), + BOOT_PARAM_PRESERVE(e820_entries), + BOOT_PARAM_PRESERVE(eddbuf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(secure_boot), + BOOT_PARAM_PRESERVE(hdr), + BOOT_PARAM_PRESERVE(e820_table), + BOOT_PARAM_PRESERVE(eddbuf), + BOOT_PARAM_PRESERVE(cc_blob_address), + }; + + memset(&scratch, 0, sizeof(scratch)); + + for (i = 0; i < ARRAY_SIZE(to_save); i++) { + memcpy(save_base + to_save[i].start, + bp_base + to_save[i].start, to_save[i].len); + } + + memcpy(boot_params, save_base, sizeof(*boot_params)); } } diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 6804d6642767..d561a8443c13 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -3,83 +3,193 @@ #define _ASM_X86_BUG_H #include <linux/stringify.h> +#include <linux/instrumentation.h> +#include <linux/objtool.h> +#include <asm/asm.h> + +#ifndef __ASSEMBLY__ +struct bug_entry; +extern void __WARN_trap(struct bug_entry *bug, ...); +#endif /* * Despite that some emulators terminate on UD2, we use it for WARN(). - * - * Since various instruction decoders/specs disagree on the encoding of - * UD0/UD1. */ - -#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */ -#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */ -#define ASM_UD2 ".byte 0x0f, 0x0b" - -#define INSN_UD0 0xff0f +#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b) #define INSN_UD2 0x0b0f - #define LEN_UD2 2 +#define ASM_UDB _ASM_BYTES(0xd6) +#define INSN_UDB 0xd6 +#define LEN_UDB 1 + +/* + * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit. + */ +#define INSN_ASOP 0x67 +#define INSN_LOCK 0xf0 +#define OPCODE_ESCAPE 0x0f +#define SECOND_BYTE_OPCODE_UD1 0xb9 +#define SECOND_BYTE_OPCODE_UD2 0x0b + +#define BUG_NONE 0xffff +#define BUG_UD2 0xfffe +#define BUG_UD1 0xfffd +#define BUG_UD1_UBSAN 0xfffc +#define BUG_UD1_WARN 0xfffb +#define BUG_UDB 0xffd6 +#define BUG_LOCK 0xfff0 + #ifdef CONFIG_GENERIC_BUG -#ifdef CONFIG_X86_32 -# define __BUG_REL(val) ".long " __stringify(val) +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define __BUG_ENTRY_VERBOSE(file, line) \ + "\t.long " file " - .\t# bug_entry::file\n" \ + "\t.word " line "\t# bug_entry::line\n" #else -# define __BUG_REL(val) ".long " __stringify(val) " - 2b" +#define __BUG_ENTRY_VERBOSE(file, line) #endif -#ifdef CONFIG_DEBUG_BUGVERBOSE +#if defined(CONFIG_X86_64) || defined(CONFIG_DEBUG_BUGVERBOSE_DETAILED) +#define HAVE_ARCH_BUG_FORMAT +#define __BUG_ENTRY_FORMAT(format) \ + "\t.long " format " - .\t# bug_entry::format\n" +#else +#define __BUG_ENTRY_FORMAT(format) +#endif -#define _BUG_FLAGS(ins, flags) \ -do { \ - asm volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \ - "\t.word %c1" "\t# bug_entry::line\n" \ - "\t.word %c2" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c3\n" \ - ".popsection" \ - : : "i" (__FILE__), "i" (__LINE__), \ - "i" (flags), \ - "i" (sizeof(struct bug_entry))); \ -} while (0) +#ifdef CONFIG_X86_64 +#define HAVE_ARCH_BUG_FORMAT_ARGS +#endif -#else /* !CONFIG_DEBUG_BUGVERBOSE */ +#define __BUG_ENTRY(format, file, line, flags) \ + "\t.long 1b - ." "\t# bug_entry::bug_addr\n" \ + __BUG_ENTRY_FORMAT(format) \ + __BUG_ENTRY_VERBOSE(file, line) \ + "\t.word " flags "\t# bug_entry::flags\n" + +#define _BUG_FLAGS_ASM(format, file, line, flags, size, extra) \ + ".pushsection __bug_table,\"aw\"\n\t" \ + ANNOTATE_DATA_SPECIAL "\n\t" \ + "2:\n\t" \ + __BUG_ENTRY(format, file, line, flags) \ + "\t.org 2b + " size "\n" \ + ".popsection\n" \ + extra + +#ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED +#define WARN_CONDITION_STR(cond_str) cond_str +#else +#define WARN_CONDITION_STR(cond_str) "" +#endif -#define _BUG_FLAGS(ins, flags) \ +#define _BUG_FLAGS(cond_str, ins, flags, extra) \ do { \ - asm volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t.word %c0" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c1\n" \ - ".popsection" \ - : : "i" (flags), \ - "i" (sizeof(struct bug_entry))); \ + asm_inline volatile("1:\t" ins "\n" \ + _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \ + "%c[line]", "%c[fl]", \ + "%c[size]", extra) \ + : : [fmt] "i" (WARN_CONDITION_STR(cond_str)), \ + [file] "i" (__FILE__), \ + [line] "i" (__LINE__), \ + [fl] "i" (flags), \ + [size] "i" (sizeof(struct bug_entry))); \ } while (0) -#endif /* CONFIG_DEBUG_BUGVERBOSE */ +#define ARCH_WARN_ASM(file, line, flags, size) \ + ".pushsection .rodata.str1.1, \"aMS\", @progbits, 1\n" \ + "99:\n" \ + "\t.string \"\"\n" \ + ".popsection\n" \ + "1:\t " ASM_UD2 "\n" \ + _BUG_FLAGS_ASM("99b", file, line, flags, size, "") #else -#define _BUG_FLAGS(ins, flags) asm volatile(ins) +#define _BUG_FLAGS(cond_str, ins, flags, extra) asm volatile(ins) #endif /* CONFIG_GENERIC_BUG */ #define HAVE_ARCH_BUG #define BUG() \ do { \ - _BUG_FLAGS(ASM_UD2, 0); \ - unreachable(); \ + instrumentation_begin(); \ + _BUG_FLAGS("", ASM_UD2, 0, ""); \ + __builtin_unreachable(); \ } while (0) -#define __WARN_FLAGS(flags) \ -do { \ - _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ - annotate_reachable(); \ +/* + * This instrumentation_begin() is strictly speaking incorrect; but it + * suppresses the complaints from WARN()s in noinstr code. If such a WARN() + * were to trigger, we'd rather wreck the machine in an attempt to get the + * message out than not know about it. + */ + +#define ARCH_WARN_REACHABLE ANNOTATE_REACHABLE(1b) + +#define __WARN_FLAGS(cond_str, flags) \ +do { \ + auto __flags = BUGFLAG_WARNING|(flags); \ + instrumentation_begin(); \ + _BUG_FLAGS(cond_str, ASM_UD2, __flags, ARCH_WARN_REACHABLE); \ + instrumentation_end(); \ +} while (0) + +#ifdef HAVE_ARCH_BUG_FORMAT_ARGS + +#ifndef __ASSEMBLY__ +#include <linux/static_call_types.h> +DECLARE_STATIC_CALL(WARN_trap, __WARN_trap); + +struct pt_regs; +struct sysv_va_list { /* from AMD64 System V ABI */ + unsigned int gp_offset; + unsigned int fp_offset; + void *overflow_arg_area; + void *reg_save_area; +}; +struct arch_va_list { + unsigned long regs[6]; + struct sysv_va_list args; +}; +extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs); +#endif /* __ASSEMBLY__ */ + +#define __WARN_bug_entry(flags, format) ({ \ + struct bug_entry *bug; \ + asm_inline volatile("lea (2f)(%%rip), %[addr]\n1:\n" \ + _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \ + "%c[line]", "%c[fl]", \ + "%c[size]", "") \ + : [addr] "=r" (bug) \ + : [fmt] "i" (format), \ + [file] "i" (__FILE__), \ + [line] "i" (__LINE__), \ + [fl] "i" (flags), \ + [size] "i" (sizeof(struct bug_entry))); \ + bug; }) + +#define __WARN_print_arg(flags, format, arg...) \ +do { \ + int __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_ARGS ; \ + static_call_mod(WARN_trap)(__WARN_bug_entry(__flags, format), ## arg); \ + asm (""); /* inhibit tail-call optimization */ \ } while (0) +#define __WARN_printf(taint, fmt, arg...) \ + __WARN_print_arg(BUGFLAG_TAINT(taint), fmt, ## arg) + +#define WARN_ONCE(cond, format, arg...) ({ \ + int __ret_warn_on = !!(cond); \ + if (unlikely(__ret_warn_on)) { \ + __WARN_print_arg(BUGFLAG_ONCE|BUGFLAG_TAINT(TAINT_WARN),\ + format, ## arg); \ + } \ + __ret_warn_on; \ +}) + +#endif /* HAVE_ARCH_BUG_FORMAT_ARGS */ + #include <asm-generic/bug.h> #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 542509b53e0f..f25ca2d709d4 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -4,18 +4,12 @@ #include <asm/processor.h> -extern void check_bugs(void); - -#if defined(CONFIG_CPU_SUP_INTEL) -void check_mpx_erratum(struct cpuinfo_x86 *c); -#else -static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {} -#endif - #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else static inline int ppro_with_ram_bug(void) { return 0; } #endif +extern void cpu_bugs_smt_update(void); + #endif /* _ASM_X86_BUGS_H */ diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index abe08690a887..69404eae9983 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -8,7 +8,7 @@ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 63feaf2a5f93..b192d917a6d0 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_CACHEFLUSH_H #define _ASM_X86_CACHEFLUSH_H +#include <linux/mm.h> + /* Caches aren't brain-dead on the intel. */ #include <asm-generic/cacheflush.h> #include <asm/special_insns.h> diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h index 86b63c7feab7..5aa061199866 100644 --- a/arch/x86/include/asm/cacheinfo.h +++ b/arch/x86/include/asm/cacheinfo.h @@ -2,7 +2,17 @@ #ifndef _ASM_X86_CACHEINFO_H #define _ASM_X86_CACHEINFO_H -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); +/* Kernel controls MTRR and/or PAT MSRs. */ +extern unsigned int memory_caching_control; +#define CACHE_MTRR 0x01 +#define CACHE_PAT 0x02 + +void cache_disable(void); +void cache_enable(void); +void set_cache_aps_delayed_init(bool val); +bool get_cache_aps_delayed_init(void); +void cache_bp_init(void); +void cache_bp_restore(void); +void cache_aps_init(void); #endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h deleted file mode 100644 index a8303ebe089f..000000000000 --- a/arch/x86/include/asm/calgary.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Derived from include/asm-powerpc/iommu.h - * - * Copyright IBM Corporation, 2006-2007 - * - * Author: Jon Mason <jdmason@us.ibm.com> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _ASM_X86_CALGARY_H -#define _ASM_X86_CALGARY_H - -#include <linux/spinlock.h> -#include <linux/device.h> -#include <linux/dma-mapping.h> -#include <linux/timer.h> -#include <asm/types.h> - -struct iommu_table { - const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ - unsigned long it_base; /* mapped address of tce table */ - unsigned long it_hint; /* Hint for next alloc */ - unsigned long *it_map; /* A simple allocation bitmap for now */ - void __iomem *bbar; /* Bridge BAR */ - u64 tar_val; /* Table Address Register */ - struct timer_list watchdog_timer; - spinlock_t it_lock; /* Protects it_map */ - unsigned int it_size; /* Size of iommu table in entries */ - unsigned char it_busno; /* Bus number this table belongs to */ -}; - -struct cal_chipset_ops { - void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev); - void (*tce_cache_blast)(struct iommu_table *tbl); - void (*dump_error_regs)(struct iommu_table *tbl); -}; - -#define TCE_TABLE_SIZE_UNSPECIFIED ~0 -#define TCE_TABLE_SIZE_64K 0 -#define TCE_TABLE_SIZE_128K 1 -#define TCE_TABLE_SIZE_256K 2 -#define TCE_TABLE_SIZE_512K 3 -#define TCE_TABLE_SIZE_1M 4 -#define TCE_TABLE_SIZE_2M 5 -#define TCE_TABLE_SIZE_4M 6 -#define TCE_TABLE_SIZE_8M 7 - -extern int use_calgary; - -#ifdef CONFIG_CALGARY_IOMMU -extern int detect_calgary(void); -#else -static inline int detect_calgary(void) { return -ENODEV; } -#endif - -#endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h index 2930f560d7f3..e1f965bb1e31 100644 --- a/arch/x86/include/asm/ce4100.h +++ b/arch/x86/include/asm/ce4100.h @@ -4,4 +4,10 @@ int ce4100_pci_init(void); +#ifdef CONFIG_SERIAL_8250 +void __init sdv_serial_fixup(void); +#else +static inline void sdv_serial_fixup(void) {}; +#endif + #endif diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h new file mode 100644 index 000000000000..c40b9ebc1fb4 --- /dev/null +++ b/arch/x86/include/asm/cfi.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CFI_H +#define _ASM_X86_CFI_H + +/* + * Clang Control Flow Integrity (CFI) support. + * + * Copyright (C) 2022 Google LLC + */ +#include <linux/bug.h> +#include <asm/ibt.h> + +/* + * An overview of the various calling conventions... + * + * Traditional: + * + * foo: + * ... code here ... + * ret + * + * direct caller: + * call foo + * + * indirect caller: + * lea foo(%rip), %r11 + * ... + * call *%r11 + * + * + * IBT: + * + * foo: + * endbr64 + * ... code here ... + * ret + * + * direct caller: + * call foo / call foo+4 + * + * indirect caller: + * lea foo(%rip), %r11 + * ... + * call *%r11 + * + * + * kCFI: + * + * __cfi_foo: + * movl $0x12345678, %eax + * # 11 nops when CONFIG_CALL_PADDING + * foo: + * endbr64 # when IBT + * ... code here ... + * ret + * + * direct call: + * call foo # / call foo+4 when IBT + * + * indirect call: + * lea foo(%rip), %r11 + * ... + * movl $(-0x12345678), %r10d + * addl -4(%r11), %r10d # -15 when CONFIG_CALL_PADDING + * jz 1f + * ud2 + * 1:call *%r11 + * + * + * FineIBT (builds as kCFI + CALL_PADDING + IBT + RETPOLINE and runtime patches into): + * + * __cfi_foo: + * endbr64 + * subl 0x12345678, %eax + * jne.32,pn foo+3 + * foo: + * nopl -42(%rax) # was endbr64 + * ... code here ... + * ret + * + * direct caller: + * call foo / call foo+4 + * + * indirect caller: + * lea foo(%rip), %r11 + * ... + * movl $0x12345678, %eax + * lea -0x10(%r11), %r11 + * nop5 + * call *%r11 + * + */ +enum cfi_mode { + CFI_AUTO, /* FineIBT if hardware has IBT, otherwise kCFI */ + CFI_OFF, /* Taditional / IBT depending on .config */ + CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */ + CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */ +}; + +extern enum cfi_mode cfi_mode; + +#ifdef CONFIG_FINEIBT_BHI +extern bool cfi_bhi; +#else +#define cfi_bhi (0) +#endif + +typedef u8 bhi_thunk[32]; +extern bhi_thunk __bhi_args[]; +extern bhi_thunk __bhi_args_end[]; + +struct pt_regs; + +#ifdef CONFIG_CFI +enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); +#define __bpfcall + +static inline int cfi_get_offset(void) +{ + switch (cfi_mode) { + case CFI_FINEIBT: + return 16; + case CFI_KCFI: + if (IS_ENABLED(CONFIG_CALL_PADDING)) + return 16; + return 5; + default: + return 0; + } +} +#define cfi_get_offset cfi_get_offset + +extern u32 cfi_get_func_hash(void *func); +#define cfi_get_func_hash cfi_get_func_hash + +extern int cfi_get_func_arity(void *func); + +#ifdef CONFIG_FINEIBT +extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type); +#else +static inline bool +decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type) +{ + return false; +} + +#endif + +#else +static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) +{ + return BUG_TRAP_TYPE_NONE; +} +static inline int cfi_get_func_arity(void *func) +{ + return 0; +} +#endif /* CONFIG_CFI */ + +#if HAS_KERNEL_IBT == 1 +#define CFI_NOSEAL(x) asm(IBT_NOSEAL(__stringify(x))) +#endif + +#endif /* _ASM_X86_CFI_H */ diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h index d79d1e622dcf..6df6ece8a28e 100644 --- a/arch/x86/include/asm/checksum.h +++ b/arch/x86/include/asm/checksum.h @@ -1,6 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_X86_32 -# include <asm/checksum_32.h> +#ifdef CONFIG_GENERIC_CSUM +# include <asm-generic/checksum.h> #else -# include <asm/checksum_64.h> +# define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 +# define HAVE_CSUM_COPY_USER +# define _HAVE_ARCH_CSUM_AND_COPY +# ifdef CONFIG_X86_32 +# include <asm/checksum_32.h> +# else +# include <asm/checksum_64.h> +# endif #endif diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index f57b94e02c57..17da95387997 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -27,9 +27,7 @@ asmlinkage __wsum csum_partial(const void *buff, int len, __wsum sum); * better 64-bit) boundary */ -asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, - int len, __wsum sum, - int *src_err_ptr, int *dst_err_ptr); +asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len); /* * Note: when you get a NULL pointer exception here this means someone @@ -38,24 +36,21 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, * If you use these functions directly please don't forget the * access_ok(). */ -static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, - int len, __wsum sum) +static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { - return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); + return csum_partial_copy_generic(src, dst, len); } -static inline __wsum csum_partial_copy_from_user(const void __user *src, - void *dst, - int len, __wsum sum, - int *err_ptr) +static inline __wsum csum_and_copy_from_user(const void __user *src, + void *dst, int len) { __wsum ret; might_sleep(); - stac(); - ret = csum_partial_copy_generic((__force void *)src, dst, - len, sum, err_ptr, NULL); - clac(); + if (!user_access_begin(src, len)) + return 0; + ret = csum_partial_copy_generic((__force void *)src, dst, len); + user_access_end(); return ret; } @@ -173,27 +168,19 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, /* * Copy and checksum to user */ -#define HAVE_CSUM_COPY_USER static inline __wsum csum_and_copy_to_user(const void *src, void __user *dst, - int len, __wsum sum, - int *err_ptr) + int len) { __wsum ret; might_sleep(); - if (access_ok(dst, len)) { - stac(); - ret = csum_partial_copy_generic(src, (__force void *)dst, - len, sum, NULL, err_ptr); - clac(); - return ret; - } - - if (len) - *err_ptr = -EFAULT; - - return (__force __wsum)-1; /* invalid checksum */ + if (!user_access_begin(dst, len)) + return 0; + + ret = csum_partial_copy_generic(src, (__force void *)dst, len); + user_access_end(); + return ret; } #endif /* _ASM_X86_CHECKSUM_32_H */ diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index 3ec6d3267cf9..4d4a47a3a8ab 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -9,7 +9,6 @@ */ #include <linux/compiler.h> -#include <linux/uaccess.h> #include <asm/byteorder.h> /** @@ -129,26 +128,12 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); -#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 -#define HAVE_CSUM_COPY_USER 1 - - /* Do not call this directly. Use the wrappers below */ -extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst, - int len, __wsum sum, - int *src_err_ptr, int *dst_err_ptr); - - -extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum isum, int *errp); -extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst, - int len, __wsum isum, int *errp); -extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, - int len, __wsum sum); +extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len); -/* Old names. To be removed. */ -#define csum_and_copy_to_user csum_partial_copy_to_user -#define csum_and_copy_from_user csum_partial_copy_from_user +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len); +extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len); +extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len); /** * ip_compute_csum - Compute an 16bit IP checksum. diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index dc4cfc888d6d..dc9dc7b3911a 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -4,14 +4,18 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#define VCLOCK_NONE 0 /* No vDSO clock available. */ -#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ -#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */ -#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */ -#define VCLOCK_MAX 3 +#include <asm/vdso/clocksource.h> -struct arch_clocksource_data { - int vclock_mode; -}; +extern unsigned int vclocks_used; + +static inline bool vclock_was_used(int vclock) +{ + return READ_ONCE(vclocks_used) & (1U << vclock); +} + +static inline void vclocks_set_used(unsigned int which) +{ + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << which)); +} #endif /* _ASM_X86_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index 6faaf27e8899..6cbd9ae58b21 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h @@ -2,6 +2,10 @@ #ifndef _ASM_X86_CMDLINE_H #define _ASM_X86_CMDLINE_H +#include <asm/setup.h> + +extern char builtin_cmdline[COMMAND_LINE_SIZE]; + int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); int cmdline_find_option(const char *cmdline_ptr, const char *option, char *buffer, int bufsize); diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index a8bfac131256..a88b06f1c35e 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -22,7 +22,7 @@ extern void __add_wrong_size(void) /* * Constants for operation sizes. On 32-bit, the 64-bit size it set to * -1 because sizeof will never return -1, thereby making those switch - * case statements guaranteeed dead code which the compiler will + * case statements guaranteed dead code which the compiler will * eliminate, and allowing the "missing symbol in the default case" to * indicate a usage error. */ @@ -44,22 +44,22 @@ extern void __add_wrong_size(void) __typeof__ (*(ptr)) __ret = (arg); \ switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ - asm volatile (lock #op "b %b0, %1\n" \ + asm_inline volatile (lock #op "b %b0, %1" \ : "+q" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_W: \ - asm volatile (lock #op "w %w0, %1\n" \ + asm_inline volatile (lock #op "w %w0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_L: \ - asm volatile (lock #op "l %0, %1\n" \ + asm_inline volatile (lock #op "l %0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_Q: \ - asm volatile (lock #op "q %q0, %1\n" \ + asm_inline volatile (lock #op "q %q0, %1" \ : "+r" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ @@ -91,7 +91,7 @@ extern void __add_wrong_size(void) case __X86_CASE_B: \ { \ volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile(lock "cmpxchgb %2,%1" \ + asm_inline volatile(lock "cmpxchgb %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "q" (__new), "0" (__old) \ : "memory"); \ @@ -100,7 +100,7 @@ extern void __add_wrong_size(void) case __X86_CASE_W: \ { \ volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile(lock "cmpxchgw %2,%1" \ + asm_inline volatile(lock "cmpxchgw %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -109,7 +109,7 @@ extern void __add_wrong_size(void) case __X86_CASE_L: \ { \ volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile(lock "cmpxchgl %2,%1" \ + asm_inline volatile(lock "cmpxchgl %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -118,7 +118,7 @@ extern void __add_wrong_size(void) case __X86_CASE_Q: \ { \ volatile u64 *__ptr = (volatile u64 *)(ptr); \ - asm volatile(lock "cmpxchgq %2,%1" \ + asm_inline volatile(lock "cmpxchgq %2, %1" \ : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ @@ -134,7 +134,7 @@ extern void __add_wrong_size(void) __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) #define __sync_cmpxchg(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + __raw_cmpxchg((ptr), (old), (new), (size), "lock ") #define __cmpxchg_local(ptr, old, new, size) \ __raw_cmpxchg((ptr), (old), (new), (size), "") @@ -165,9 +165,8 @@ extern void __add_wrong_size(void) case __X86_CASE_B: \ { \ volatile u8 *__ptr = (volatile u8 *)(_ptr); \ - asm volatile(lock "cmpxchgb %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "q" (__new) \ @@ -177,9 +176,8 @@ extern void __add_wrong_size(void) case __X86_CASE_W: \ { \ volatile u16 *__ptr = (volatile u16 *)(_ptr); \ - asm volatile(lock "cmpxchgw %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ @@ -189,9 +187,8 @@ extern void __add_wrong_size(void) case __X86_CASE_L: \ { \ volatile u32 *__ptr = (volatile u32 *)(_ptr); \ - asm volatile(lock "cmpxchgl %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ @@ -201,9 +198,8 @@ extern void __add_wrong_size(void) case __X86_CASE_Q: \ { \ volatile u64 *__ptr = (volatile u64 *)(_ptr); \ - asm volatile(lock "cmpxchgq %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ @@ -221,9 +217,21 @@ extern void __add_wrong_size(void) #define __try_cmpxchg(ptr, pold, new, size) \ __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) -#define try_cmpxchg(ptr, pold, new) \ +#define __sync_try_cmpxchg(ptr, pold, new, size) \ + __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock ") + +#define __try_cmpxchg_local(ptr, pold, new, size) \ + __raw_try_cmpxchg((ptr), (pold), (new), (size), "") + +#define arch_try_cmpxchg(ptr, pold, new) \ __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) +#define arch_sync_try_cmpxchg(ptr, pold, new) \ + __sync_try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) + +#define arch_try_cmpxchg_local(ptr, pold, new) \ + __try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr))) + /* * xadd() adds "inc" to "*ptr" and atomically returns the previous * value of "*ptr". @@ -233,29 +241,4 @@ extern void __add_wrong_size(void) #define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) -#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ -({ \ - bool __ret; \ - __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ - __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ - BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ - BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ - VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ - VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ - asm volatile(pfx "cmpxchg%c5b %1" \ - CC_SET(e) \ - : CC_OUT(e) (__ret), \ - "+m" (*(p1)), "+m" (*(p2)), \ - "+a" (__old1), "+d" (__old2) \ - : "i" (2 * sizeof(long)), \ - "b" (__new1), "c" (__new2)); \ - __ret; \ -}) - -#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) - -#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double(, p1, p2, o1, o2, n1, n2) - #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 1a2eafca7038..1f80a62be969 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -3,113 +3,153 @@ #define _ASM_X86_CMPXCHG_32_H /* - * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you + * Note: if you use __cmpxchg64(), or their variants, * you need to test for the feature in boot_cpu_data. */ -/* - * CMPXCHG8B only writes to the target if we had the previous - * value in registers, otherwise it acts as a read and gives us the - * "new previous" value. That is why there is a loop. Preloading - * EDX:EAX is a performance optimization: in the common case it means - * we need only one locked operation. - * - * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very - * least an FPU save and/or %cr0.ts manipulation. - * - * cmpxchg8b must be used with the lock prefix here to allow the - * instruction to be executed atomically. We need to have the reader - * side to see the coherent 64bit value. - */ -static inline void set_64bit(volatile u64 *ptr, u64 value) +union __u64_halves { + u64 full; + struct { + u32 low, high; + }; +}; + +#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \ +({ \ + union __u64_halves o = { .full = (_old), }, \ + n = { .full = (_new), }; \ + \ + asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ + : [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + o.full; \ +}) + + +static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) { - u32 low = value; - u32 high = value >> 32; - u64 prev = *ptr; - - asm volatile("\n1:\t" - LOCK_PREFIX "cmpxchg8b %0\n\t" - "jnz 1b" - : "=m" (*ptr), "+A" (prev) - : "b" (low), "c" (high) - : "memory"); + return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX); } -#ifdef CONFIG_X86_CMPXCHG64 -#define arch_cmpxchg64(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ - (unsigned long long)(n))) -#define arch_cmpxchg64_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ - (unsigned long long)(n))) -#endif +static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) +{ + return __arch_cmpxchg64(ptr, old, new,); +} + +#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \ +({ \ + union __u64_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ + : "=@ccz" (ret), \ + [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) -static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) +static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new) { - u64 prev; - asm volatile(LOCK_PREFIX "cmpxchg8b %1" - : "=A" (prev), - "+m" (*ptr) - : "b" ((u32)new), - "c" ((u32)(new >> 32)), - "0" (old) - : "memory"); - return prev; + return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX); } -static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) +static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new) { - u64 prev; - asm volatile("cmpxchg8b %1" - : "=A" (prev), - "+m" (*ptr) - : "b" ((u32)new), - "c" ((u32)(new >> 32)), - "0" (old) - : "memory"); - return prev; + return __arch_try_cmpxchg64(ptr, oldp, new,); } -#ifndef CONFIG_X86_CMPXCHG64 +#ifdef CONFIG_X86_CX8 + +#define arch_cmpxchg64 __cmpxchg64 + +#define arch_cmpxchg64_local __cmpxchg64_local + +#define arch_try_cmpxchg64 __try_cmpxchg64 + +#define arch_try_cmpxchg64_local __try_cmpxchg64_local + +#else + /* * Building a kernel capable running on 80386 and 80486. It may be necessary * to simulate the cmpxchg8b on the 80386 and 80486 CPU. */ -#define arch_cmpxchg64(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __typeof__(*(ptr)) __old = (o); \ - __typeof__(*(ptr)) __new = (n); \ - alternative_io(LOCK_PREFIX_HERE \ - "call cmpxchg8b_emu", \ - "lock; cmpxchg8b (%%esi)" , \ - X86_FEATURE_CX8, \ - "=A" (__ret), \ - "S" ((ptr)), "0" (__old), \ - "b" ((unsigned int)__new), \ - "c" ((unsigned int)(__new>>32)) \ - : "memory"); \ - __ret; }) - - -#define arch_cmpxchg64_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __typeof__(*(ptr)) __old = (o); \ - __typeof__(*(ptr)) __new = (n); \ - alternative_io("call cmpxchg8b_emu", \ - "cmpxchg8b (%%esi)" , \ - X86_FEATURE_CX8, \ - "=A" (__ret), \ - "S" ((ptr)), "0" (__old), \ - "b" ((unsigned int)__new), \ - "c" ((unsigned int)(__new>>32)) \ - : "memory"); \ - __ret; }) +#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \ +({ \ + union __u64_halves o = { .full = (_old), }, \ + n = { .full = (_new), }; \ + \ + asm_inline volatile( \ + ALTERNATIVE(_lock_loc \ + "call cmpxchg8b_emu", \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \ + : "b" (n.low), "c" (n.high), \ + [ptr] "S" (_ptr) \ + : "memory"); \ + \ + o.full; \ +}) + +static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new) +{ + return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock "); +} +#define arch_cmpxchg64 arch_cmpxchg64 + +static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) +{ + return __arch_cmpxchg64_emu(ptr, old, new, ,); +} +#define arch_cmpxchg64_local arch_cmpxchg64_local + +#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \ +({ \ + union __u64_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm_inline volatile( \ + ALTERNATIVE(_lock_loc \ + "call cmpxchg8b_emu", \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : ALT_OUTPUT_SP("=@ccz" (ret), \ + "+a" (o.low), "+d" (o.high)) \ + : "b" (n.low), "c" (n.high), \ + [ptr] "S" (_ptr) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new) +{ + return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock "); +} +#define arch_try_cmpxchg64 arch_try_cmpxchg64 + +static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new) +{ + return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,); +} +#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local #endif -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) +#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8) #endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 072e5459fe2f..5afea056fb20 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -2,11 +2,6 @@ #ifndef _ASM_X86_CMPXCHG_64_H #define _ASM_X86_CMPXCHG_64_H -static inline void set_64bit(volatile u64 *ptr, u64 val) -{ - *ptr = val; -} - #define arch_cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ @@ -19,6 +14,82 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) arch_cmpxchg_local((ptr), (o), (n)); \ }) -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) +#define arch_try_cmpxchg64(ptr, po, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_try_cmpxchg((ptr), (po), (n)); \ +}) + +#define arch_try_cmpxchg64_local(ptr, po, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_try_cmpxchg_local((ptr), (po), (n)); \ +}) + +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __arch_cmpxchg128(_ptr, _old, _new, _lock) \ +({ \ + union __u128_halves o = { .full = (_old), }, \ + n = { .full = (_new), }; \ + \ + asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ + : [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + o.full; \ +}) + +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) +{ + return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX); +} +#define arch_cmpxchg128 arch_cmpxchg128 + +static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new) +{ + return __arch_cmpxchg128(ptr, old, new,); +} +#define arch_cmpxchg128_local arch_cmpxchg128_local + +#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock) \ +({ \ + union __u128_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ + : "=@ccz" (ret), \ + [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new) +{ + return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX); +} +#define arch_try_cmpxchg128 arch_try_cmpxchg128 + +static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new) +{ + return __arch_try_cmpxchg128(ptr, oldp, new,); +} +#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local + +#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16) #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h new file mode 100644 index 000000000000..e1dbf8df1b69 --- /dev/null +++ b/arch/x86/include/asm/coco.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_COCO_H +#define _ASM_X86_COCO_H + +#include <asm/asm.h> +#include <asm/types.h> + +enum cc_vendor { + CC_VENDOR_NONE, + CC_VENDOR_AMD, + CC_VENDOR_INTEL, +}; + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern enum cc_vendor cc_vendor; +extern u64 cc_mask; + +static inline u64 cc_get_mask(void) +{ + return cc_mask; +} + +static inline void cc_set_mask(u64 mask) +{ + cc_mask = mask; +} + +u64 cc_mkenc(u64 val); +u64 cc_mkdec(u64 val); +void cc_random_init(void); +#else +#define cc_vendor (CC_VENDOR_NONE) +static inline u64 cc_get_mask(void) +{ + return 0; +} + +static inline u64 cc_mkenc(u64 val) +{ + return val; +} + +static inline u64 cc_mkdec(u64 val) +{ + return val; +} +static inline void cc_random_init(void) { } +#endif + +#endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 22c4dfe65992..b1221da477b7 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -12,34 +12,35 @@ #include <asm/user32.h> #include <asm/unistd.h> -#include <asm-generic/compat.h> - -#define COMPAT_USER_HZ 100 -#define COMPAT_UTS_MACHINE "i686\0\0" +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; +#define __compat_uid_t __compat_uid_t typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; + +#define compat_dev_t compat_dev_t typedef u16 compat_dev_t; + +#define compat_ipc_pid_t compat_ipc_pid_t +typedef u16 compat_ipc_pid_t; + +#define compat_statfs compat_statfs + +#include <asm-generic/compat.h> + +#define COMPAT_UTS_MACHINE "i686\0\0" + typedef u16 compat_nlink_t; -typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; -typedef __kernel_fsid_t compat_fsid_t; -typedef s64 __attribute__((aligned(4))) compat_s64; -typedef u64 __attribute__((aligned(4))) compat_u64; struct compat_stat { - compat_dev_t st_dev; - u16 __pad1; + u32 st_dev; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; - compat_dev_t st_rdev; - u16 __pad2; + u32 st_rdev; u32 st_size; u32 st_blksize; u32 st_blocks; @@ -53,29 +54,11 @@ struct compat_stat { u32 __unused5; }; -struct compat_flock { - short l_type; - short l_whence; - compat_off_t l_start; - compat_off_t l_len; - compat_pid_t l_pid; -}; - -#define F_GETLK64 12 /* using 'struct flock64' */ -#define F_SETLK64 13 -#define F_SETLKW64 14 - /* - * IA32 uses 4 byte alignment for 64 bit quantities, - * so we need to pack this structure. + * IA32 uses 4 byte alignment for 64 bit quantities, so we need to pack the + * compat flock64 structure. */ -struct compat_flock64 { - short l_type; - short l_whence; - compat_loff_t l_start; - compat_loff_t l_len; - compat_pid_t l_pid; -} __attribute__((packed)); +#define __ARCH_NEED_COMPAT_FLOCK64_PACKED struct compat_statfs { int f_type; @@ -92,122 +75,11 @@ struct compat_statfs { int f_spare[4]; }; -#define COMPAT_RLIM_INFINITY 0xffffffff - -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - -#define COMPAT_OFF_T_MAX 0x7fffffff - -struct compat_ipc64_perm { - compat_key_t key; - __compat_uid32_t uid; - __compat_gid32_t gid; - __compat_uid32_t cuid; - __compat_gid32_t cgid; - unsigned short mode; - unsigned short __pad1; - unsigned short seq; - unsigned short __pad2; - compat_ulong_t unused1; - compat_ulong_t unused2; -}; - -struct compat_semid64_ds { - struct compat_ipc64_perm sem_perm; - compat_ulong_t sem_otime; - compat_ulong_t sem_otime_high; - compat_ulong_t sem_ctime; - compat_ulong_t sem_ctime_high; - compat_ulong_t sem_nsems; - compat_ulong_t __unused3; - compat_ulong_t __unused4; -}; - -struct compat_msqid64_ds { - struct compat_ipc64_perm msg_perm; - compat_ulong_t msg_stime; - compat_ulong_t msg_stime_high; - compat_ulong_t msg_rtime; - compat_ulong_t msg_rtime_high; - compat_ulong_t msg_ctime; - compat_ulong_t msg_ctime_high; - compat_ulong_t msg_cbytes; - compat_ulong_t msg_qnum; - compat_ulong_t msg_qbytes; - compat_pid_t msg_lspid; - compat_pid_t msg_lrpid; - compat_ulong_t __unused4; - compat_ulong_t __unused5; -}; - -struct compat_shmid64_ds { - struct compat_ipc64_perm shm_perm; - compat_size_t shm_segsz; - compat_ulong_t shm_atime; - compat_ulong_t shm_atime_high; - compat_ulong_t shm_dtime; - compat_ulong_t shm_dtime_high; - compat_ulong_t shm_ctime; - compat_ulong_t shm_ctime_high; - compat_pid_t shm_cpid; - compat_pid_t shm_lpid; - compat_ulong_t shm_nattch; - compat_ulong_t __unused4; - compat_ulong_t __unused5; -}; - -/* - * The type of struct elf_prstatus.pr_reg in compatible core dumps. - */ -typedef struct user_regs_struct compat_elf_gregset_t; - -/* Full regset -- prstatus on x32, otherwise on ia32 */ -#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296) -#define SET_PR_FPVALID(S, V, R) \ - do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \ - while (0) - #ifdef CONFIG_X86_X32_ABI #define COMPAT_USE_64BIT_TIME \ (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - -static inline void __user *arch_compat_alloc_user_space(long len) -{ - compat_uptr_t sp; - - if (test_thread_flag(TIF_IA32)) { - sp = task_pt_regs(current)->sp; - } else { - /* -128 for the x32 ABI redzone */ - sp = task_pt_regs(current)->sp - 128; - } - - return (void __user *)round_down(sp - len, 16); -} - static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI @@ -228,10 +100,15 @@ static inline bool in_compat_syscall(void) return in_32bit_syscall(); } #define in_compat_syscall in_compat_syscall /* override the generic impl */ +#define compat_need_64bit_alignment_fixup in_ia32_syscall #endif struct compat_siginfo; -int __copy_siginfo_to_user32(struct compat_siginfo __user *to, - const kernel_siginfo_t *from, bool x32_ABI); + +#ifdef CONFIG_X86_X32_ABI +int copy_siginfo_to_user32(struct compat_siginfo __user *to, + const kernel_siginfo_t *from); +#define copy_siginfo_to_user32 copy_siginfo_to_user32 +#endif /* CONFIG_X86_X32_ABI */ #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index adc6cc86b062..ad235dda1ded 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,37 +7,64 @@ #include <linux/topology.h> #include <linux/nodemask.h> #include <linux/percpu.h> +#include <asm/ibt.h> -#ifdef CONFIG_SMP - -extern void prefill_possible_map(void); - -#else /* CONFIG_SMP */ - -static inline void prefill_possible_map(void) {} - +#ifndef CONFIG_SMP #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define cpu_acpi_id(cpu) 0 -#define safe_smp_processor_id() 0 - #endif /* CONFIG_SMP */ -struct x86_cpu { - struct cpu cpu; -}; - #ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -extern void start_cpu0(void); -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 -extern int _debug_hotplug_cpu(int cpu, int action); -#endif +extern void soft_restart_cpu(void); #endif +extern void ap_init_aperfmperf(void); + int mwait_usable(const struct cpuinfo_x86 *); unsigned int x86_family(unsigned int sig); unsigned int x86_model(unsigned int sig); unsigned int x86_stepping(unsigned int sig); +#ifdef CONFIG_X86_BUS_LOCK_DETECT +extern void __init sld_setup(struct cpuinfo_x86 *c); +extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); +extern bool handle_guest_split_lock(unsigned long ip); +extern void handle_bus_lock(struct pt_regs *regs); +void split_lock_init(void); +void bus_lock_init(void); +#else +static inline void __init sld_setup(struct cpuinfo_x86 *c) {} +static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) +{ + return false; +} + +static inline bool handle_guest_split_lock(unsigned long ip) +{ + return false; +} + +static inline void handle_bus_lock(struct pt_regs *regs) {} +static inline void split_lock_init(void) {} +static inline void bus_lock_init(void) {} +#endif + +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#else +static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} +#endif + +extern __noendbr void cet_disable(void); + +struct cpu_signature; + +void intel_collect_cpu_info(struct cpu_signature *sig); + +extern u64 x86_read_arch_cap_msr(void); +bool intel_find_matching_signature(void *mc, struct cpu_signature *sig); +int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type); + +extern struct cpumask cpus_stop_mask; + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index baeba0567126..6be777a06944 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -1,14 +1,207 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CPU_DEVICE_ID -#define _CPU_DEVICE_ID 1 +#ifndef _ASM_X86_CPU_DEVICE_ID +#define _ASM_X86_CPU_DEVICE_ID + +/* + * Can't use <linux/bitfield.h> because it generates expressions that + * cannot be used in structure initializers. Bitfield construction + * here must match the union in struct cpuinfo_86: + * union { + * struct { + * __u8 x86_model; + * __u8 x86; + * __u8 x86_vendor; + * __u8 x86_reserved; + * }; + * __u32 x86_vfm; + * }; + */ +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) /* * Declare drivers belonging to specific x86 CPUs * Similar in spirit to pci_device_id and related PCI functions + * + * The wildcard initializers are in mod_devicetable.h because + * file2alias needs them. Sigh. */ - #include <linux/mod_devicetable.h> +/* Get the INTEL_FAM* model defines */ +#include <asm/intel-family.h> +/* And the X86_VENDOR_* ones */ +#include <asm/processor.h> + +/* Centaur FAM6 models */ +#define X86_CENTAUR_FAM6_C7_A 0xa +#define X86_CENTAUR_FAM6_C7_D 0xd +#define X86_CENTAUR_FAM6_NANO 0xf + +/* x86_cpu_id::flags */ +#define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0) + +/** + * X86_MATCH_CPU - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Use only if you need all selectors. Otherwise use one of the shorter + * macros of the X86_MATCH_* family. If there is no matching shorthand + * macro, consider to add one. If you really need to wrap one of the macros + * into another macro at the usage site for good reasons, then please + * start this local macro with X86_MATCH to allow easy grepping. + */ +#define X86_MATCH_CPU(_vendor, _family, _model, _steppings, _feature, _type, _data) { \ + .vendor = _vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \ + .type = _type, \ + .driver_data = (unsigned long) _data \ +} + +/** + * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + */ +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + */ +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, X86_FAMILY_ANY, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_FEATURE - Macro for matching a CPU feature + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + */ +#define X86_MATCH_FEATURE(feature, data) \ + X86_MATCH_CPU(X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @model: The model number, model constant or X86_MODEL_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + */ +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, model, X86_STEPPING_ANY, \ + X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VENDOR_FAM - Match vendor and family + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + */ +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \ + X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VFM - Match encoded vendor/family/model + * @vfm: Encoded 8-bits each for vendor, family, model + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + */ +#define X86_MATCH_VFM(vfm, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) + +#define __X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) +/** + * X86_MATCH_VFM_STEPS - Match encoded vendor/family/model and steppings + * range. + * @vfm: Encoded 8-bits each for vendor, family, model + * @min_step: Lowest stepping number to match + * @max_step: Highest stepping number to match + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + */ +#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + __X86_STEPPINGS(min_step, max_step), X86_FEATURE_ANY, \ + X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature + * @vfm: Encoded 8-bits each for vendor, family, model + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + */ +#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VFM_CPU_TYPE - Match encoded vendor/family/model/type + * @vfm: Encoded 8-bits each for vendor, family, model + * @type: CPU type e.g. P-core, E-core + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + */ +#define X86_MATCH_VFM_CPU_TYPE(vfm, type, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, type, data) extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); +extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table); -#endif +#endif /* _ASM_X86_CPU_DEVICE_ID */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 29c706415443..462fc34f1317 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPU_ENTRY_AREA_H #define _ASM_X86_CPU_ENTRY_AREA_H @@ -6,6 +6,78 @@ #include <linux/percpu-defs.h> #include <asm/processor.h> #include <asm/intel_ds.h> +#include <asm/pgtable_areas.h> + +#ifdef CONFIG_X86_64 + +#ifdef CONFIG_AMD_MEM_ENCRYPT +#define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ +#else +#define VC_EXCEPTION_STKSZ 0 +#endif + +/* Macro to enforce the same ordering and stack sizes */ +#define ESTACKS_MEMBERS(guardsize, optional_stack_size) \ + char DF_stack_guard[guardsize]; \ + char DF_stack[EXCEPTION_STKSZ]; \ + char NMI_stack_guard[guardsize]; \ + char NMI_stack[EXCEPTION_STKSZ]; \ + char DB_stack_guard[guardsize]; \ + char DB_stack[EXCEPTION_STKSZ]; \ + char MCE_stack_guard[guardsize]; \ + char MCE_stack[EXCEPTION_STKSZ]; \ + char VC_stack_guard[guardsize]; \ + char VC_stack[optional_stack_size]; \ + char VC2_stack_guard[guardsize]; \ + char VC2_stack[optional_stack_size]; \ + char IST_top_guard[guardsize]; \ + +/* The exception stacks' physical storage. No guard pages required */ +struct exception_stacks { + ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ) +}; + +/* The effective cpu entry area mapping with guard pages. */ +struct cea_exception_stacks { + ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ) +}; + +/* + * The exception stack ordering in [cea_]exception_stacks + */ +enum exception_stack_ordering { + ESTACK_DF, + ESTACK_NMI, + ESTACK_DB, + ESTACK_MCE, + ESTACK_VC, + ESTACK_VC2, + N_EXCEPTION_STACKS +}; + +#define CEA_ESTACK_SIZE(st) \ + sizeof(((struct cea_exception_stacks *)0)->st## _stack) + +#define CEA_ESTACK_BOT(ceastp, st) \ + ((unsigned long)&(ceastp)->st## _stack) + +#define CEA_ESTACK_TOP(ceastp, st) \ + (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) + +#define CEA_ESTACK_OFFS(st) \ + offsetof(struct cea_exception_stacks, st## _stack) + +#define CEA_ESTACK_PAGES \ + (sizeof(struct cea_exception_stacks) / PAGE_SIZE) + +#endif + +#ifdef CONFIG_X86_32 +struct doublefault_stack { + unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)]; + struct x86_hw_tss tss; +} __aligned(PAGE_SIZE); +#endif /* * cpu_entry_area is a percpu region that contains things needed by the CPU @@ -20,10 +92,19 @@ struct cpu_entry_area { /* * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. + * a read-only guard page. On 32-bit the GDT must be writeable, so + * it needs an extra guard page. */ +#ifdef CONFIG_X86_32 + char guard_entry_stack[PAGE_SIZE]; +#endif struct entry_stack_page entry_stack_page; +#ifdef CONFIG_X86_32 + char guard_doublefault_stack[PAGE_SIZE]; + struct doublefault_stack doublefault_stack; +#endif + /* * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because * we need task switches to work, and task switches write to the TSS. @@ -32,14 +113,10 @@ struct cpu_entry_area { #ifdef CONFIG_X86_64 /* - * Exception stacks used for IST entries. - * - * In the future, this should have a separate slot for each stack - * with guard pages between them. + * Exception stacks used for IST entries with guard pages. */ - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; + struct cea_exception_stacks estacks; #endif -#ifdef CONFIG_CPU_SUP_INTEL /* * Per CPU debug store for Intel performance monitoring. Wastes a * full page at the moment. @@ -50,30 +127,27 @@ struct cpu_entry_area { * Reserve enough fixmap PTEs. */ struct debug_store_buffers cpu_debug_buffers; -#endif }; -#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); +DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); -#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE -#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) - -#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) - -#define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) - extern struct cpu_entry_area *get_cpu_entry_area(int cpu); -static inline struct entry_stack *cpu_entry_stack(int cpu) +static __always_inline struct entry_stack *cpu_entry_stack(int cpu) { return &get_cpu_entry_area(cpu)->entry_stack_page.stack; } +#define __this_cpu_ist_top_va(name) \ + CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name) + +#define __this_cpu_ist_bottom_va(name) \ + CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name) + #endif diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce95b8cbd229..3ddc1d33399b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -4,10 +4,12 @@ #include <asm/processor.h> -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#if defined(__KERNEL__) && !defined(__ASSEMBLER__) #include <asm/asm.h> #include <linux/bitops.h> +#include <asm/alternative.h> +#include <asm/cpufeaturemasks.h> enum cpuid_leafs { @@ -22,105 +24,48 @@ enum cpuid_leafs CPUID_LNX_3, CPUID_7_0_EBX, CPUID_D_1_EAX, - CPUID_F_0_EDX, - CPUID_F_1_EDX, + CPUID_LNX_4, + CPUID_7_1_EAX, CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, CPUID_7_ECX, - CPUID_8000_0007_EBX, + CPUID_LNX_6, CPUID_7_EDX, + CPUID_8000_001F_EAX, + CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; -#ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; -#define X86_CAP_FMT "%s" -#define x86_cap_flag(flag) x86_cap_flags[flag] -#else -#define X86_CAP_FMT "%d:%d" -#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31) -#endif /* * In order to save room, we index into this array by doing * X86_BUG_<name> - NCAPINTS*32. */ extern const char * const x86_bug_flags[NBUGINTS*32]; +#define x86_bug_flag(flag) x86_bug_flags[flag] #define test_cpu_cap(c, bit) \ - test_bit(bit, (unsigned long *)((c)->x86_capability)) - -/* - * There are 32 bits/features in each mask word. The high bits - * (selected with (bit>>5) give us the word number and the low 5 - * bits give us the bit/feature number inside the word. - * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can - * see if it is set in the mask word. - */ -#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ - (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) - -#define REQUIRED_MASK_BIT_SET(feature_bit) \ - ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ - REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) - -#define DISABLED_MASK_BIT_SET(feature_bit) \ - ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ - CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ - DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 19)) + arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) #define this_cpu_has(bit) \ - (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ - x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, cpu_info.x86_capability)) /* - * This macro is for detection of features which need kernel - * infrastructure to be used. It may *not* directly test the CPU - * itself. Use the cpu_has() family if you want true runtime - * testing of CPU features, like in hypervisor code where you are - * supporting a possible guest feature where host support for it + * This is the default CPU features testing macro to use in code. + * + * It is for detection of features which need kernel infrastructure to be + * used. It may *not* directly test the CPU itself. Use the cpu_has() family + * if you want true runtime testing of CPU features, like in hypervisor code + * where you are supporting a possible guest feature where host support for it * is not relevant. */ #define cpu_feature_enabled(bit) \ @@ -132,70 +77,38 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); - -#define setup_force_cpu_cap(bit) do { \ - set_cpu_cap(&boot_cpu_data, bit); \ +void check_cpufeature_deps(struct cpuinfo_x86 *c); + +#define setup_force_cpu_cap(bit) do { \ + \ + if (!boot_cpu_has(bit)) \ + WARN_ON(alternatives_patched); \ + \ + set_cpu_cap(&boot_cpu_data, bit); \ set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) - -/* - * Workaround for the sake of BPF compilation which utilizes kernel - * headers, but clang does not support ASM GOTO and fails the build. - */ -#ifndef __BPF_TRACING__ -#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" -#endif - -#define static_cpu_has(bit) boot_cpu_has(bit) - -#else - /* - * Static testing of CPU features. Used the same as boot_cpu_has(). - * These will statically patch the target code for additional - * performance. + * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know + * that this is only used on a fallback path and will sometimes cause + * it to manifest the address of boot_cpu_data in a register, fouling + * the mainline (post-initialization) code. */ -static __always_inline __pure bool _static_cpu_has(u16 bit) +static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" - "2:\n" - ".skip -(((5f-4f) - (2b-1b)) > 0) * " - "((5f-4f) - (2b-1b)),0x90\n" - "3:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 4f - .\n" /* repl offset */ - " .word %P[always]\n" /* always replace */ - " .byte 3b - 1b\n" /* src len */ - " .byte 5f - 4f\n" /* repl len */ - " .byte 3b - 2b\n" /* pad len */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "4: jmp %l[t_no]\n" - "5:\n" - ".previous\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 0\n" /* no replacement */ - " .word %P[feature]\n" /* feature bit */ - " .byte 3b - 1b\n" /* src len */ - " .byte 0\n" /* repl len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .altinstr_aux,\"ax\"\n" - "6:\n" - " testb %[bitnum],%[cap_byte]\n" - " jnz %l[t_yes]\n" - " jmp %l[t_no]\n" - ".previous\n" + asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") + ".pushsection .altinstr_aux,\"ax\"\n" + "6:\n" + ANNOTATE_DATA_SPECIAL "\n" + " testb %[bitnum], %a[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".popsection\n" : : [feature] "i" (bit), - [always] "i" (X86_FEATURE_ALWAYS), [bitnum] "i" (1 << (bit & 7)), - [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3]) : : t_yes, t_no); t_yes: return true; @@ -209,7 +122,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) @@ -226,5 +138,5 @@ t_no: #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ boot_cpu_data.x86_model -#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */ #endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6d6122524711..c3b53beb1300 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -2,187 +2,178 @@ #ifndef _ASM_X86_CPUFEATURES_H #define _ASM_X86_CPUFEATURES_H -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#include <asm/required-features.h> -#endif - -#ifndef _ASM_X86_DISABLED_FEATURES_H -#include <asm/disabled-features.h> -#endif - /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ +#define NBUGINTS 2 /* N 32-bit bug flags */ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ - -/* CPU types for specific tunings: */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */ +/* Free ( 3*32+ 7) */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -190,196 +181,393 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0xf, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ +/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */ +#define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */ +#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_LASS (12*32+ 6) /* "lass" Linear Address Space Separation */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ +#define X86_FEATURE_EFER_LMSLE_MBZ (13*32+20) /* EFER.LMSLE must be zero */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */ +#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ - -/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ + +/* + * Linux-defined word for use with scattered/synthetic bits. + */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ + +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ + +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ +#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ + +/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ + +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ + +#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ + +#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ + +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc and Linux defined features. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ +#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ +#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ +#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ +#define X86_FEATURE_SGX_EUPDATESVN (21*32+17) /* Support for ENCLS[EUPDATESVN] instruction */ + +#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO (21*32+19) /* + * Clear CPU buffers before VM-Enter if the vCPU + * can access host MMIO (ignored for all intents + * and purposes if CLEAR_CPU_BUF_VM is set). + */ +#define X86_FEATURE_X2AVIC_EXT (21*32+20) /* AMD SVM x2AVIC support for 4k vCPUs */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ +/* BUG word 2 */ +#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ +#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ +#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h new file mode 100644 index 000000000000..44fa82e1267c --- /dev/null +++ b/arch/x86/include/asm/cpuid/api.h @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUID_API_H +#define _ASM_X86_CPUID_API_H + +#include <asm/cpuid/types.h> + +#include <linux/build_bug.h> +#include <linux/types.h> + +#include <asm/string.h> + +/* + * Raw CPUID accessors: + */ + +#ifdef CONFIG_X86_32 +bool cpuid_feature(void); +#else +static inline bool cpuid_feature(void) +{ + return true; +} +#endif + +static inline void native_cpuid(u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx) + : "memory"); +} + +#define NATIVE_CPUID_REG(reg) \ +static inline u32 native_cpuid_##reg(u32 op) \ +{ \ + u32 eax = op, ebx, ecx = 0, edx; \ + \ + native_cpuid(&eax, &ebx, &ecx, &edx); \ + \ + return reg; \ +} + +/* + * Native CPUID functions returning a single datum: + */ +NATIVE_CPUID_REG(eax) +NATIVE_CPUID_REG(ebx) +NATIVE_CPUID_REG(ecx) +NATIVE_CPUID_REG(edx) + +#ifdef CONFIG_PARAVIRT_XXL +# include <asm/paravirt.h> +#else +# define __cpuid native_cpuid +#endif + +/* + * Generic CPUID function + * + * Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX + * resulting in stale register contents being returned. + */ +static inline void cpuid(u32 op, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ECX */ +static inline void cpuid_count(u32 op, int count, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum: + */ + +static inline u32 cpuid_eax(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return eax; +} + +static inline u32 cpuid_ebx(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ebx; +} + +static inline u32 cpuid_ecx(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ecx; +} + +static inline u32 cpuid_edx(u32 op) +{ + u32 eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + + return edx; +} + +static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs) +{ + regs[CPUID_EAX] = leaf; + regs[CPUID_ECX] = subleaf; + __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX); +} + +#define cpuid_subleaf(leaf, subleaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, subleaf, (u32 *)(regs)); \ +} + +#define cpuid_leaf(leaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, 0, (u32 *)(regs)); \ +} + +static inline void __cpuid_read_reg(u32 leaf, u32 subleaf, + enum cpuid_regs_idx regidx, u32 *reg) +{ + u32 regs[4]; + + __cpuid_read(leaf, subleaf, regs); + *reg = regs[regidx]; +} + +#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \ +} + +#define cpuid_leaf_reg(leaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \ +} + +/* + * Hypervisor-related APIs: + */ + +static __always_inline bool cpuid_function_is_indexed(u32 function) +{ + switch (function) { + case 4: + case 7: + case 0xb: + case 0xd: + case 0xf: + case 0x10: + case 0x12: + case 0x14: + case 0x17: + case 0x18: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x24: + case 0x8000001d: + return true; + } + + return false; +} + +#define for_each_possible_cpuid_base_hypervisor(function) \ + for (function = 0x40000000; function < 0x40010000; function += 0x100) + +static inline u32 cpuid_base_hypervisor(const char *sig, u32 leaves) +{ + u32 base, eax, signature[3]; + + for_each_possible_cpuid_base_hypervisor(base) { + cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); + + /* + * This must not compile to "call memcmp" because it's called + * from PVH early boot code before instrumentation is set up + * and memcmp() itself may be instrumented. + */ + if (!__builtin_memcmp(sig, signature, 12) && + (leaves == 0 || ((eax - base) >= leaves))) + return base; + } + + return 0; +} + +/* + * CPUID(0x2) parsing: + */ + +/** + * cpuid_leaf_0x2() - Return sanitized CPUID(0x2) register output + * @regs: Output parameter + * + * Query CPUID(0x2) and store its output in @regs. Force set any + * invalid 1-byte descriptor returned by the hardware to zero (the NULL + * cache/TLB descriptor) before returning it to the caller. + * + * Use for_each_cpuid_0x2_desc() to iterate over the register output in + * parsed form. + */ +static inline void cpuid_leaf_0x2(union leaf_0x2_regs *regs) +{ + cpuid_leaf(0x2, regs); + + /* + * All Intel CPUs must report an iteration count of 1. In case + * of bogus hardware, treat all returned descriptors as NULL. + */ + if (regs->desc[0] != 0x01) { + for (int i = 0; i < 4; i++) + regs->regv[i] = 0; + return; + } + + /* + * The most significant bit (MSB) of each register must be clear. + * If a register is invalid, replace its descriptors with NULL. + */ + for (int i = 0; i < 4; i++) { + if (regs->reg[i].invalid) + regs->regv[i] = 0; + } +} + +/** + * for_each_cpuid_0x2_desc() - Iterator for parsed CPUID(0x2) descriptors + * @_regs: CPUID(0x2) register output, as returned by cpuid_leaf_0x2() + * @_ptr: u8 pointer, for macro internal use only + * @_desc: Pointer to the parsed CPUID(0x2) descriptor at each iteration + * + * Loop over the 1-byte descriptors in the passed CPUID(0x2) output registers + * @_regs. Provide the parsed information for each descriptor through @_desc. + * + * To handle cache-specific descriptors, switch on @_desc->c_type. For TLB + * descriptors, switch on @_desc->t_type. + * + * Example usage for cache descriptors:: + * + * const struct leaf_0x2_table *desc; + * union leaf_0x2_regs regs; + * u8 *ptr; + * + * cpuid_leaf_0x2(®s); + * for_each_cpuid_0x2_desc(regs, ptr, desc) { + * switch (desc->c_type) { + * ... + * } + * } + */ +#define for_each_cpuid_0x2_desc(_regs, _ptr, _desc) \ + for (_ptr = &(_regs).desc[1]; \ + _ptr < &(_regs).desc[16] && (_desc = &cpuid_0x2_table[*_ptr]); \ + _ptr++) + +/* + * CPUID(0x80000006) parsing: + */ + +static inline bool cpuid_amd_hygon_has_l3_cache(void) +{ + return cpuid_edx(0x80000006); +} + +#endif /* _ASM_X86_CPUID_API_H */ diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h new file mode 100644 index 000000000000..8a00364b79de --- /dev/null +++ b/arch/x86/include/asm/cpuid/types.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUID_TYPES_H +#define _ASM_X86_CPUID_TYPES_H + +#include <linux/build_bug.h> +#include <linux/types.h> + +/* + * Types for raw CPUID access: + */ + +struct cpuid_regs { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +}; + +enum cpuid_regs_idx { + CPUID_EAX = 0, + CPUID_EBX, + CPUID_ECX, + CPUID_EDX, +}; + +#define CPUID_LEAF_MWAIT 0x05 +#define CPUID_LEAF_DCA 0x09 +#define CPUID_LEAF_XSTATE 0x0d +#define CPUID_LEAF_TSC 0x15 +#define CPUID_LEAF_FREQ 0x16 +#define CPUID_LEAF_TILE 0x1d + +/* + * Types for CPUID(0x2) parsing: + */ + +struct leaf_0x2_reg { + u32 : 31, + invalid : 1; +}; + +union leaf_0x2_regs { + struct leaf_0x2_reg reg[4]; + u32 regv[4]; + u8 desc[16]; +}; + +/* + * Leaf 0x2 1-byte descriptors' cache types + * To be used for their mappings at cpuid_0x2_table[] + * + * Start at 1 since type 0 is reserved for HW byte descriptors which are + * not recognized by the kernel; i.e., those without an explicit mapping. + */ +enum _cache_table_type { + CACHE_L1_INST = 1, + CACHE_L1_DATA, + CACHE_L2, + CACHE_L3 + /* Adjust __TLB_TABLE_TYPE_BEGIN before adding more types */ +} __packed; +#ifndef __CHECKER__ +static_assert(sizeof(enum _cache_table_type) == 1); +#endif + +/* + * Ensure that leaf 0x2 cache and TLB type values do not intersect, + * since they share the same type field at struct cpuid_0x2_table. + */ +#define __TLB_TABLE_TYPE_BEGIN (CACHE_L3 + 1) + +/* + * Leaf 0x2 1-byte descriptors' TLB types + * To be used for their mappings at cpuid_0x2_table[] + */ +enum _tlb_table_type { + TLB_INST_4K = __TLB_TABLE_TYPE_BEGIN, + TLB_INST_4M, + TLB_INST_2M_4M, + TLB_INST_ALL, + + TLB_DATA_4K, + TLB_DATA_4M, + TLB_DATA_2M_4M, + TLB_DATA_4K_4M, + TLB_DATA_1G, + TLB_DATA_1G_2M_4M, + + TLB_DATA0_4K, + TLB_DATA0_4M, + TLB_DATA0_2M_4M, + + STLB_4K, + STLB_4K_2M, +} __packed; +#ifndef __CHECKER__ +static_assert(sizeof(enum _tlb_table_type) == 1); +#endif + +/* + * Combined parsing table for leaf 0x2 cache and TLB descriptors. + */ + +struct leaf_0x2_table { + union { + enum _cache_table_type c_type; + enum _tlb_table_type t_type; + }; + union { + short c_size; + short entries; + }; +}; + +extern const struct leaf_0x2_table cpuid_0x2_table[256]; + +/* + * All of leaf 0x2's one-byte TLB descriptors implies the same number of entries + * for their respective TLB types. TLB descriptor 0x63 is an exception: it + * implies 4 dTLB entries for 1GB pages and 32 dTLB entries for 2MB or 4MB pages. + * + * Encode that descriptor's dTLB entry count for 2MB/4MB pages here, as the entry + * count for dTLB 1GB pages is already encoded at the cpuid_0x2_table[]'s mapping. + */ +#define TLB_0x63_2M_4M_ENTRIES 32 + +#endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h new file mode 100644 index 000000000000..c8b39c6716ff --- /dev/null +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_HALTPOLL_H +#define _ARCH_HALTPOLL_H + +void arch_haltpoll_enable(unsigned int cpu); +void arch_haltpoll_disable(unsigned int cpu); + +#endif diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 6722ffcef2e6..9df9e9cde670 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -1,15 +1,40 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPUMASK_H #define _ASM_X86_CPUMASK_H -#ifndef __ASSEMBLY__ -#include <linux/cpumask.h> +#ifndef __ASSEMBLER__ -extern cpumask_var_t cpu_callin_mask; -extern cpumask_var_t cpu_callout_mask; -extern cpumask_var_t cpu_initialized_mask; -extern cpumask_var_t cpu_sibling_setup_mask; +#include <linux/compiler.h> +#include <linux/cpumask.h> extern void setup_cpu_local_masks(void); -#endif /* __ASSEMBLY__ */ +/* + * NMI and MCE exceptions need cpu_is_offline() _really_ early, + * provide an arch_ special for them to avoid instrumentation. + */ +#if NR_CPUS > 1 +static __always_inline bool arch_cpu_online(int cpu) +{ + return arch_test_bit(cpu, cpumask_bits(cpu_online_mask)); +} + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} +#else +static __always_inline bool arch_cpu_online(int cpu) +{ + return cpu == 0; +} + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + return; +} +#endif + +#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) + +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index 0acf5ee45a21..8b6bd63530dc 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,8 +2,9 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); -int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); void crash_smp_send_stop(void); diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h new file mode 100644 index 000000000000..7835b2cdff04 --- /dev/null +++ b/arch/x86/include/asm/crash_reserve.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_CRASH_RESERVE_H +#define _X86_CRASH_RESERVE_H + +/* 16M alignment for crash kernel regions */ +#define CRASH_ALIGN SZ_16M + +/* + * Keep the crash kernel below this limit. + * + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range + * due to mapping restrictions. + * + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is + * the upper limit of system RAM in 4-level paging mode. Since the kdump + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. + */ +extern unsigned long swiotlb_size_or_default(void); + +#ifdef CONFIG_X86_32 +# define CRASH_ADDR_LOW_MAX SZ_512M +# define CRASH_ADDR_HIGH_MAX SZ_512M +#else +# define CRASH_ADDR_LOW_MAX SZ_4G +# define CRASH_ADDR_HIGH_MAX SZ_64T +#endif + +# define DEFAULT_CRASH_KERNEL_LOW_SIZE crash_low_size_default() + +static inline unsigned long crash_low_size_default(void) +{ +#ifdef CONFIG_X86_64 + return max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); +#else + return 0; +#endif +} + +#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + +#endif /* _X86_CRASH_RESERVE_H */ diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h deleted file mode 100644 index c508521dd190..000000000000 --- a/arch/x86/include/asm/crypto/aes.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_AES_H -#define ASM_X86_AES_H - -#include <linux/crypto.h> -#include <crypto/aes.h> - -void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -#endif diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h deleted file mode 100644 index a5d86fc0593f..000000000000 --- a/arch/x86/include/asm/crypto/camellia.h +++ /dev/null @@ -1,96 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_CAMELLIA_H -#define ASM_X86_CAMELLIA_H - -#include <crypto/b128ops.h> -#include <linux/crypto.h> -#include <linux/kernel.h> - -#define CAMELLIA_MIN_KEY_SIZE 16 -#define CAMELLIA_MAX_KEY_SIZE 32 -#define CAMELLIA_BLOCK_SIZE 16 -#define CAMELLIA_TABLE_BYTE_LEN 272 -#define CAMELLIA_PARALLEL_BLOCKS 2 - -struct crypto_skcipher; - -struct camellia_ctx { - u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; - u32 key_length; -}; - -struct camellia_xts_ctx { - struct camellia_ctx tweak_ctx; - struct camellia_ctx crypt_ctx; -}; - -extern int __camellia_setkey(struct camellia_ctx *cctx, - const unsigned char *key, - unsigned int key_len, u32 *flags); - -extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen); - -/* regular block cipher functions */ -asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -/* 2-way parallel cipher functions */ -asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -/* 16-way parallel cipher functions (avx/aes-ni) */ -asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk(ctx, dst, src, false); -} - -static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk(ctx, dst, src, true); -} - -static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk_2way(ctx, dst, src, false); -} - -static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk_2way(ctx, dst, src, true); -} - -/* glue helpers */ -extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src); -extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, - le128 *iv); -extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, - le128 *iv); - -extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); - -#endif /* ASM_X86_CAMELLIA_H */ diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h deleted file mode 100644 index d1818634ae7e..000000000000 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Shared glue code for 128bit block ciphers - */ - -#ifndef _CRYPTO_GLUE_HELPER_H -#define _CRYPTO_GLUE_HELPER_H - -#include <crypto/internal/skcipher.h> -#include <linux/kernel.h> -#include <asm/fpu/api.h> -#include <crypto/b128ops.h> - -typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); -typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, - le128 *iv); -typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src, - le128 *iv); - -#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) -#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) -#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) -#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn)) - -struct common_glue_func_entry { - unsigned int num_blocks; /* number of blocks that @fn will process */ - union { - common_glue_func_t ecb; - common_glue_cbc_func_t cbc; - common_glue_ctr_func_t ctr; - common_glue_xts_func_t xts; - } fn_u; -}; - -struct common_glue_ctx { - unsigned int num_funcs; - int fpu_blocks_limit; /* -1 means fpu not needed at all */ - - /* - * First funcs entry must have largest num_blocks and last funcs entry - * must have num_blocks == 1! - */ - struct common_glue_func_entry funcs[]; -}; - -static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct skcipher_walk *walk, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - - /* prevent sleeping if FPU is in use */ - skcipher_walk_atomise(walk); - - kernel_fpu_begin(); - return true; -} - -static inline void glue_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -static inline void le128_to_be128(be128 *dst, const le128 *src) -{ - dst->a = cpu_to_be64(le64_to_cpu(src->a)); - dst->b = cpu_to_be64(le64_to_cpu(src->b)); -} - -static inline void be128_to_le128(le128 *dst, const be128 *src) -{ - dst->a = cpu_to_le64(be64_to_cpu(src->a)); - dst->b = cpu_to_le64(be64_to_cpu(src->b)); -} - -static inline void le128_inc(le128 *i) -{ - u64 a = le64_to_cpu(i->a); - u64 b = le64_to_cpu(i->b); - - b++; - if (!b) - a++; - - i->a = cpu_to_le64(a); - i->b = cpu_to_le64(b); -} - -extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - -extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, - struct skcipher_request *req); - -extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - -extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - -extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); - -extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, - le128 *iv, common_glue_func_t fn); - -#endif /* _CRYPTO_GLUE_HELPER_H */ diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h deleted file mode 100644 index db7c9cc32234..000000000000 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_SERPENT_AVX_H -#define ASM_X86_SERPENT_AVX_H - -#include <crypto/b128ops.h> -#include <crypto/serpent.h> -#include <linux/types.h> - -struct crypto_skcipher; - -#define SERPENT_PARALLEL_BLOCKS 8 - -struct serpent_xts_ctx { - struct serpent_ctx tweak_ctx; - struct serpent_ctx crypt_ctx; -}; - -asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, - le128 *iv); - -extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); - -extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen); - -#endif diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h deleted file mode 100644 index 1a345e8a7496..000000000000 --- a/arch/x86/include/asm/crypto/serpent-sse2.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_SERPENT_SSE2_H -#define ASM_X86_SERPENT_SSE2_H - -#include <linux/crypto.h> -#include <crypto/serpent.h> - -#ifdef CONFIG_X86_32 - -#define SERPENT_PARALLEL_BLOCKS 4 - -asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_4way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_4way(ctx, dst, src); -} - -#else - -#define SERPENT_PARALLEL_BLOCKS 8 - -asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_8way(ctx, dst, src); -} - -#endif - -#endif diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h deleted file mode 100644 index f618bf272b90..000000000000 --- a/arch/x86/include/asm/crypto/twofish.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_TWOFISH_H -#define ASM_X86_TWOFISH_H - -#include <linux/crypto.h> -#include <crypto/twofish.h> -#include <crypto/b128ops.h> - -/* regular block cipher functions from twofish_x86_64 module */ -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); - -/* 3-way parallel cipher functions */ -asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); - -/* helpers from twofish_x86_64-3way module */ -extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); -extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, - le128 *iv); -extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, - le128 *iv); - -#endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 3e204e6140b5..cc4a3f725b37 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -2,21 +2,31 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H +#include <linux/build_bug.h> #include <linux/compiler.h> + +#ifndef __ASSEMBLER__ + +#include <linux/cache.h> #include <asm/percpu.h> -#ifndef __ASSEMBLY__ struct task_struct; -DECLARE_PER_CPU(struct task_struct *, current_task); +DECLARE_PER_CPU_CACHE_HOT(struct task_struct *, current_task); +/* const-qualified alias provided by the linker. */ +DECLARE_PER_CPU_CACHE_HOT(struct task_struct * const __percpu_seg_override, + const_current_task); static __always_inline struct task_struct *get_current(void) { + if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) + return this_cpu_read_const(const_current_task); + return this_cpu_read_stable(current_task); } #define current get_current() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 9e5ca30738e5..a2c1f2d24b64 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -2,10 +2,21 @@ #ifndef _ASM_X86_DEBUGREG_H #define _ASM_X86_DEBUGREG_H - #include <linux/bug.h> +#include <linux/percpu.h> #include <uapi/asm/debugreg.h> +#include <asm/cpufeature.h> +#include <asm/msr.h> + +/* + * Define bits that are always set to 1 in DR7, only bit 10 is + * architecturally reserved to '1'. + * + * This is also the init/reset value for DR7. + */ +#define DR7_FIXED_1 0x00000400 + DECLARE_PER_CPU(unsigned long, cpu_dr7); #ifndef CONFIG_PARAVIRT_XXL @@ -18,9 +29,9 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7); native_set_debugreg(register, value) #endif -static inline unsigned long native_get_debugreg(int regno) +static __always_inline unsigned long native_get_debugreg(int regno) { - unsigned long val = 0; /* Damn you, gcc! */ + unsigned long val; switch (regno) { case 0: @@ -39,7 +50,20 @@ static inline unsigned long native_get_debugreg(int regno) asm("mov %%db6, %0" :"=r" (val)); break; case 7: - asm("mov %%db7, %0" :"=r" (val)); + /* + * Use "asm volatile" for DR7 reads to forbid re-ordering them + * with other code. + * + * This is needed because a DR7 access can cause a #VC exception + * when running under SEV-ES. Taking a #VC exception is not a + * safe thing to do just anywhere in the entry code and + * re-ordering might place the access into an unsafe location. + * + * This happened in the NMI handler, where the DR7 read was + * re-ordered to happen before the call to sev_es_ist_enter(), + * causing stack recursion. + */ + asm volatile("mov %%db7, %0" : "=r" (val)); break; default: BUG(); @@ -47,7 +71,7 @@ static inline unsigned long native_get_debugreg(int regno) return val; } -static inline void native_set_debugreg(int regno, unsigned long value) +static __always_inline void native_set_debugreg(int regno, unsigned long value) { switch (regno) { case 0: @@ -66,7 +90,16 @@ static inline void native_set_debugreg(int regno, unsigned long value) asm("mov %0, %%db6" ::"r" (value)); break; case 7: - asm("mov %0, %%db7" ::"r" (value)); + /* + * Use "asm volatile" for DR7 writes to forbid re-ordering them + * with other code. + * + * While is didn't happen with a DR7 write (see the DR7 read + * comment above which explains where it happened), add the + * "asm volatile" here too to avoid similar problems in the + * future. + */ + asm volatile("mov %0, %%db7" ::"r" (value)); break; default: BUG(); @@ -75,8 +108,8 @@ static inline void native_set_debugreg(int regno, unsigned long value) static inline void hw_breakpoint_disable(void) { - /* Zero the control register for HW Breakpoint */ - set_debugreg(0UL, 7); + /* Reset the control register for HW Breakpoint */ + set_debugreg(DR7_FIXED_1, 7); /* Zero-out the individual HW breakpoint address registers */ set_debugreg(0UL, 0); @@ -85,40 +118,80 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } -static inline int hw_breakpoint_active(void) +static __always_inline bool hw_breakpoint_active(void) { return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } -extern void aout_dump_debugregs(struct user *dump); - extern void hw_breakpoint_restore(void); -#ifdef CONFIG_X86_64 -DECLARE_PER_CPU(int, debug_stack_usage); -static inline void debug_stack_usage_inc(void) +static __always_inline unsigned long local_db_save(void) { - __this_cpu_inc(debug_stack_usage); + unsigned long dr7; + + if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active()) + return 0; + + get_debugreg(dr7, 7); + + /* Architecturally set bit */ + dr7 &= ~DR7_FIXED_1; + if (dr7) + set_debugreg(DR7_FIXED_1, 7); + + /* + * Ensure the compiler doesn't lower the above statements into + * the critical section; disabling breakpoints late would not + * be good. + */ + barrier(); + + return dr7; } -static inline void debug_stack_usage_dec(void) + +static __always_inline void local_db_restore(unsigned long dr7) { - __this_cpu_dec(debug_stack_usage); + /* + * Ensure the compiler doesn't raise this statement into + * the critical section; enabling breakpoints early would + * not be good. + */ + barrier(); + if (dr7) + set_debugreg(dr7, 7); } -int is_debug_stack(unsigned long addr); -void debug_stack_set_zero(void); -void debug_stack_reset(void); -#else /* !X86_64 */ -static inline int is_debug_stack(unsigned long addr) { return 0; } -static inline void debug_stack_set_zero(void) { } -static inline void debug_stack_reset(void) { } -static inline void debug_stack_usage_inc(void) { } -static inline void debug_stack_usage_dec(void) { } -#endif /* X86_64 */ #ifdef CONFIG_CPU_SUP_AMD -extern void set_dr_addr_mask(unsigned long mask, int dr); +extern void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr); +extern unsigned long amd_get_dr_addr_mask(unsigned int dr); #else -static inline void set_dr_addr_mask(unsigned long mask, int dr) { } +static inline void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr) { } +static inline unsigned long amd_get_dr_addr_mask(unsigned int dr) +{ + return 0; +} +#endif + +static inline unsigned long get_debugctlmsr(void) +{ + unsigned long debugctlmsr = 0; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; #endif + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr); + + return debugctlmsr; +} + +static inline void update_debugctlmsr(unsigned long debugctlmsr) +{ +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return; +#endif + wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr); +} #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index de9e7841f953..630891d25819 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -3,8 +3,10 @@ #define _ASM_X86_DELAY_H #include <asm-generic/delay.h> +#include <linux/init.h> -void use_tsc_delay(void); +void __init use_tsc_delay(void); +void __init use_tpause_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 68a99d2a5f33..ec95fe44fa3a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -9,6 +9,7 @@ #include <asm/irq_vectors.h> #include <asm/cpu_entry_area.h> +#include <linux/debug_locks.h> #include <linux/smp.h> #include <linux/percpu.h> @@ -40,11 +41,6 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->l = 0; } -extern struct desc_ptr idt_descr; -extern gate_desc idt_table[]; -extern const struct desc_ptr debug_idt_descr; -extern gate_desc debug_idt_table[]; - struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); @@ -214,7 +210,7 @@ static inline void native_load_gdt(const struct desc_ptr *dtr) asm volatile("lgdt %0"::"m" (*dtr)); } -static inline void native_load_idt(const struct desc_ptr *dtr) +static __always_inline void native_load_idt(const struct desc_ptr *dtr) { asm volatile("lidt %0"::"m" (*dtr)); } @@ -229,6 +225,26 @@ static inline void store_idt(struct desc_ptr *dtr) asm volatile("sidt %0":"=m" (*dtr)); } +static inline void native_gdt_invalidate(void) +{ + const struct desc_ptr invalid_gdt = { + .address = 0, + .size = 0 + }; + + native_load_gdt(&invalid_gdt); +} + +static inline void native_idt_invalidate(void) +{ + const struct desc_ptr invalid_idt = { + .address = 0, + .size = 0 + }; + + native_load_idt(&invalid_idt); +} + /* * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is * a read-only remapping. To prevent a page fault, the GDT is switched to the @@ -386,66 +402,48 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit1 = (limit >> 16) & 0xf; } -void update_intr_gate(unsigned int n, const void *addr); -void alloc_intr_gate(unsigned int n, const void *addr); - -extern unsigned long system_vectors[]; - -#ifdef CONFIG_X86_64 -DECLARE_PER_CPU(u32, debug_idt_ctr); -static inline bool is_debug_idt_enabled(void) +static inline void init_idt_data(struct idt_data *data, unsigned int n, + const void *addr) { - if (this_cpu_read(debug_idt_ctr)) - return true; + BUG_ON(n > 0xFF); - return false; + memset(data, 0, sizeof(*data)); + data->vector = n; + data->addr = addr; + data->segment = __KERNEL_CS; + data->bits.type = GATE_INTERRUPT; + data->bits.p = 1; } -static inline void load_debug_idt(void) -{ - load_idt((const struct desc_ptr *)&debug_idt_descr); -} -#else -static inline bool is_debug_idt_enabled(void) +static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) { - return false; -} + unsigned long addr = (unsigned long) d->addr; -static inline void load_debug_idt(void) -{ -} + gate->offset_low = (u16) addr; + gate->segment = (u16) d->segment; + gate->bits = d->bits; + gate->offset_middle = (u16) (addr >> 16); +#ifdef CONFIG_X86_64 + gate->offset_high = (u32) (addr >> 32); + gate->reserved = 0; #endif - -/* - * The load_current_idt() must be called with interrupts disabled - * to avoid races. That way the IDT will always be set back to the expected - * descriptor. It's also called when a CPU is being initialized, and - * that doesn't need to disable interrupts, as nothing should be - * bothering the CPU then. - */ -static inline void load_current_idt(void) -{ - if (is_debug_idt_enabled()) - load_debug_idt(); - else - load_idt((const struct desc_ptr *)&idt_descr); } +extern unsigned long system_vectors[]; + +extern void load_current_idt(void); extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); extern void idt_setup_traps(void); extern void idt_setup_apic_and_irq_gates(void); +extern bool idt_is_f00f_address(unsigned long address); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); -extern void idt_setup_ist_traps(void); -extern void idt_setup_debugidt_traps(void); #else static inline void idt_setup_early_pf(void) { } -static inline void idt_setup_ist_traps(void) { } -static inline void idt_setup_debugidt_traps(void) { } #endif -extern void idt_invalidate(void *addr); +extern void idt_invalidate(void); #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index a91f3b6e4f2a..7e6b9314758a 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -8,7 +8,57 @@ * archs. */ -#ifndef __ASSEMBLY__ +/* + * Low-level interface mapping flags/field names to bits + */ + +/* Flags for _DESC_S (non-system) descriptors */ +#define _DESC_ACCESSED 0x0001 +#define _DESC_DATA_WRITABLE 0x0002 +#define _DESC_CODE_READABLE 0x0002 +#define _DESC_DATA_EXPAND_DOWN 0x0004 +#define _DESC_CODE_CONFORMING 0x0004 +#define _DESC_CODE_EXECUTABLE 0x0008 + +/* Common flags */ +#define _DESC_S 0x0010 +#define _DESC_DPL(dpl) ((dpl) << 5) +#define _DESC_PRESENT 0x0080 + +#define _DESC_LONG_CODE 0x2000 +#define _DESC_DB 0x4000 +#define _DESC_GRANULARITY_4K 0x8000 + +/* System descriptors have a numeric "type" field instead of flags */ +#define _DESC_SYSTEM(code) (code) + +/* + * High-level interface mapping intended usage to low-level combinations + * of flags + */ + +#define _DESC_DATA (_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \ + _DESC_DATA_WRITABLE) +#define _DESC_CODE (_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \ + _DESC_CODE_READABLE | _DESC_CODE_EXECUTABLE) + +#define DESC_DATA16 (_DESC_DATA) +#define DESC_CODE16 (_DESC_CODE) + +#define DESC_DATA32 (_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB) +#define DESC_DATA32_BIOS (_DESC_DATA | _DESC_DB) + +#define DESC_CODE32 (_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_DB) +#define DESC_CODE32_BIOS (_DESC_CODE | _DESC_DB) + +#define DESC_TSS32 (_DESC_SYSTEM(9) | _DESC_PRESENT) + +#define DESC_DATA64 (_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB) +#define DESC_CODE64 (_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_LONG_CODE) + +#define DESC_USER (_DESC_DPL(3)) + +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -22,19 +72,19 @@ struct desc_struct { #define GDT_ENTRY_INIT(flags, base, limit) \ { \ - .limit0 = (u16) (limit), \ - .limit1 = ((limit) >> 16) & 0x0F, \ - .base0 = (u16) (base), \ - .base1 = ((base) >> 16) & 0xFF, \ - .base2 = ((base) >> 24) & 0xFF, \ - .type = (flags & 0x0f), \ - .s = (flags >> 4) & 0x01, \ - .dpl = (flags >> 5) & 0x03, \ - .p = (flags >> 7) & 0x01, \ - .avl = (flags >> 12) & 0x01, \ - .l = (flags >> 13) & 0x01, \ - .d = (flags >> 14) & 0x01, \ - .g = (flags >> 15) & 0x01, \ + .limit0 = ((limit) >> 0) & 0xFFFF, \ + .limit1 = ((limit) >> 16) & 0x000F, \ + .base0 = ((base) >> 0) & 0xFFFF, \ + .base1 = ((base) >> 16) & 0x00FF, \ + .base2 = ((base) >> 24) & 0x00FF, \ + .type = ((flags) >> 0) & 0x000F, \ + .s = ((flags) >> 4) & 0x0001, \ + .dpl = ((flags) >> 5) & 0x0003, \ + .p = ((flags) >> 7) & 0x0001, \ + .avl = ((flags) >> 12) & 0x0001, \ + .l = ((flags) >> 13) & 0x0001, \ + .d = ((flags) >> 14) & 0x0001, \ + .g = ((flags) >> 15) & 0x0001, \ } enum { @@ -74,6 +124,13 @@ struct idt_bits { p : 1; } __attribute__((packed)); +struct idt_data { + unsigned int vector; + unsigned int segment; + struct idt_bits bits; + const void *addr; +}; + struct gate_struct { u16 offset_low; u16 segment; @@ -87,6 +144,7 @@ struct gate_struct { typedef struct gate_struct gate_desc; +#ifndef _SETUP static inline unsigned long gate_offset(const gate_desc *g) { #ifdef CONFIG_X86_64 @@ -101,13 +159,17 @@ static inline unsigned long gate_segment(const gate_desc *g) { return g->segment; } +#endif struct desc_ptr { unsigned short size; unsigned long address; } __attribute__((packed)) ; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ + +/* Boot IDT definitions */ +#define BOOT_IDT_ENTRIES 32 /* Access rights as returned by LAR */ #define AR_TYPE_RODATA (0 * (1 << 9)) diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index a8f6c809d9b1..7c0a52ca2f4d 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -3,24 +3,8 @@ #define _ASM_X86_DEVICE_H struct dev_archdata { -#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) - void *iommu; /* hook for IOMMU specific extension */ -#endif -#ifdef CONFIG_STA2X11 - bool is_sta2x11; -#endif }; -#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) -struct dma_domain { - struct list_head node; - const struct dma_map_ops *dma_ops; - int domain_nr; -}; -void add_dma_domain(struct dma_domain *domain); -void del_dma_domain(struct dma_domain *domain); -#endif - struct pdev_archdata { }; diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h deleted file mode 100644 index a5ea841cc6d2..000000000000 --- a/arch/x86/include/asm/disabled-features.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _ASM_X86_DISABLED_FEATURES_H -#define _ASM_X86_DISABLED_FEATURES_H - -/* These features, although they might be available in a CPU - * will not be used because the compile options to support - * them are not present. - * - * This code allows them to be checked and disabled at - * compile time without an explicit #ifdef. Use - * cpu_feature_enabled(). - */ - -#ifdef CONFIG_X86_INTEL_MPX -# define DISABLE_MPX 0 -#else -# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) -#endif - -#ifdef CONFIG_X86_SMAP -# define DISABLE_SMAP 0 -#else -# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31)) -#endif - -#ifdef CONFIG_X86_INTEL_UMIP -# define DISABLE_UMIP 0 -#else -# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31)) -#endif - -#ifdef CONFIG_X86_64 -# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) -# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) -# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) -# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) -# define DISABLE_PCID 0 -#else -# define DISABLE_VME 0 -# define DISABLE_K6_MTRR 0 -# define DISABLE_CYRIX_ARR 0 -# define DISABLE_CENTAUR_MCR 0 -# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) -#endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -# define DISABLE_PKU 0 -# define DISABLE_OSPKE 0 -#else -# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) -# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) -#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ - -#ifdef CONFIG_X86_5LEVEL -# define DISABLE_LA57 0 -#else -# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#endif - -#ifdef CONFIG_PAGE_TABLE_ISOLATION -# define DISABLE_PTI 0 -#else -# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) -#endif - -/* - * Make sure to add features to the correct mask - */ -#define DISABLED_MASK0 (DISABLE_VME) -#define DISABLED_MASK1 0 -#define DISABLED_MASK2 0 -#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 (DISABLE_PCID) -#define DISABLED_MASK5 0 -#define DISABLED_MASK6 0 -#define DISABLED_MASK7 (DISABLE_PTI) -#define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP) -#define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 -#define DISABLED_MASK12 0 -#define DISABLED_MASK13 0 -#define DISABLED_MASK14 0 -#define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) -#define DISABLED_MASK17 0 -#define DISABLED_MASK18 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) - -#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 20a46150e0a8..30fd06ede751 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -60,6 +60,12 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) } #define div_u64_rem div_u64_rem +/* + * gcc tends to zero extend 32bit values and do full 64bit maths. + * Define asm functions that avoid this. + * (clang generates better code for the C versions.) + */ +#ifndef __clang__ static inline u64 mul_u32_u32(u32 a, u32 b) { u32 high, low; @@ -71,8 +77,54 @@ static inline u64 mul_u32_u32(u32 a, u32 b) } #define mul_u32_u32 mul_u32_u32 +static inline u64 add_u64_u32(u64 a, u32 b) +{ + u32 high = a >> 32, low = a; + + asm ("addl %[b], %[low]; adcl $0, %[high]" + : [low] "+r" (low), [high] "+r" (high) + : [b] "rm" (b) ); + + return low | (u64)high << 32; +} +#define add_u64_u32 add_u64_u32 +#endif + +/* + * __div64_32() is never called on x86, so prevent the + * generic definition from getting built. + */ +#define __div64_32 + #else # include <asm-generic/div64.h> + +/* + * Will generate an #DE when the result doesn't fit u64, could fix with an + * __ex_table[] entry when it becomes an issue. + */ +static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div) +{ + u64 rdx; + + asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul)); + + if (!statically_true(!add)) + asm ("addq %[add], %[lo]; adcq $0, %[hi]" : + [lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add)); + + asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div)); + + return rax; +} +#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64 + +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div) +{ + return mul_u64_add_u64_div_u64(a, mul, 0, div); +} +#define mul_u64_u32_div mul_u64_u32_div + #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_DIV64_H */ diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h deleted file mode 100644 index 1a19251eaac9..000000000000 --- a/arch/x86/include/asm/dma-direct.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_DMA_DIRECT_H -#define ASM_X86_DMA_DIRECT_H 1 - -bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); - -#endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ce4d176b3d13..d1dac96ee30b 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -2,35 +2,11 @@ #ifndef _ASM_X86_DMA_MAPPING_H #define _ASM_X86_DMA_MAPPING_H -/* - * IOMMU interface. See Documentation/DMA-API-HOWTO.txt and - * Documentation/DMA-API.txt for documentation. - */ - -#include <linux/scatterlist.h> -#include <linux/dma-debug.h> -#include <asm/io.h> -#include <asm/swiotlb.h> -#include <linux/dma-contiguous.h> - -#ifdef CONFIG_ISA -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) -#else -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) -#endif - -extern int iommu_merge; -extern struct device x86_dma_fallback_dev; -extern int panic_on_overflow; - extern const struct dma_map_ops *dma_ops; -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) +static inline const struct dma_map_ops *get_arch_dma_ops(void) { return dma_ops; } -bool arch_dma_alloc_attrs(struct device **dev); -#define arch_dma_alloc_attrs arch_dma_alloc_attrs - #endif diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 00f7cf45e699..8ae6e0e11b8b 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -74,7 +74,7 @@ #define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT) /* 4GB broken PCI/AGP hardware bus master zone */ -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) #ifdef CONFIG_X86_32 /* The maximum address that we can perform a DMA transfer to on this platform */ @@ -307,12 +307,4 @@ extern int request_dma(unsigned int dmanr, const char *device_id); extern void free_dma(unsigned int dmanr); #endif -/* From PCI */ - -#ifdef CONFIG_PCI -extern int isa_dma_bridge_buggy; -#else -#define isa_dma_bridge_buggy (0) -#endif - #endif /* _ASM_X86_DMA_H */ diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h new file mode 100644 index 000000000000..de0e88b32207 --- /dev/null +++ b/arch/x86/include/asm/doublefault.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DOUBLEFAULT_H +#define _ASM_X86_DOUBLEFAULT_H + +#include <linux/linkage.h> + +#ifdef CONFIG_X86_32 +extern void doublefault_init_cpu_tss(void); +#else +static inline void doublefault_init_cpu_tss(void) +{ +} +#endif + +asmlinkage void __noreturn doublefault_shim(void); + +#endif /* _ASM_X86_DOUBLEFAULT_H */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index ae391f609840..302e11b15da8 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -2,19 +2,10 @@ #ifndef _ASM_X86_DWARF2_H #define _ASM_X86_DWARF2_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif -/* - * Macros for dwarf2 CFI unwind table entries. - * See "as.info" for details on these pseudo ops. Unfortunately - * they are only supported in very new binutils, so define them - * away for older version. - */ - -#ifdef CONFIG_AS_CFI - #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc #define CFI_DEF_CFA .cfi_def_cfa @@ -30,20 +21,13 @@ #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape -#ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame -#else -#define CFI_SIGNAL_FRAME -#endif - -#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) #ifndef BUILD_VDSO /* * Emit CFI data in .debug_frame sections, not .eh_frame sections. * The latter we currently just discard since we don't do DWARF * unwinding at runtime. So only the offline DWARF information is - * useful to anyone. Note we should not use this directive if - * vmlinux.lds.S gets changed so it doesn't discard .eh_frame. + * useful to anyone. Note we should not use this directive if we + * ever decide to enable DWARF unwinding at runtime. */ .cfi_sections .debug_frame #else @@ -53,33 +37,5 @@ */ .cfi_sections .eh_frame, .debug_frame #endif -#endif - -#else - -/* - * Due to the structure of pre-exisiting code, don't use assembler line - * comment character # to ignore the arguments. Instead, use a dummy macro. - */ -.macro cfi_ignore a=0, b=0, c=0, d=0 -.endm - -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore -#define CFI_DEF_CFA_REGISTER cfi_ignore -#define CFI_DEF_CFA_OFFSET cfi_ignore -#define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_ESCAPE cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore - -#endif #endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index 62be73b23d5c..c83645d5b2a8 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -10,12 +10,14 @@ extern struct e820_table *e820_table_firmware; extern unsigned long pci_mem_start; +extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type); extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type); extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); extern void e820__range_add (u64 start, u64 size, enum e820_type type); extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type); +extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); extern void e820__print_table(char *who); extern int e820__update_table(struct e820_table *table); @@ -27,7 +29,6 @@ extern unsigned long e820__end_of_low_ram_pfn(void); extern u64 e820__memblock_alloc_reserved(u64 size, u64 align); extern void e820__memblock_setup(void); -extern void e820__reserve_setup_data(void); extern void e820__finish_early_params(void); extern void e820__reserve_resources(void); extern void e820__reserve_resources_late(void); diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h index c3aa4b5e49e2..80c4a7266629 100644 --- a/arch/x86/include/asm/e820/types.h +++ b/arch/x86/include/asm/e820/types.h @@ -29,13 +29,12 @@ enum e820_type { E820_TYPE_PRAM = 12, /* - * Reserved RAM used by the kernel itself if - * CONFIG_INTEL_TXT=y is enabled, memory of this type - * will be included in the S3 integrity calculation - * and so should not include any memory that the BIOS - * might alter over the S3 transition: + * Special-purpose memory is indicated to the system via the + * EFI_MEMORY_SP attribute. Define an e820 translation of this + * memory type for the purpose of reserving this range and + * marking it with the IORES_DESC_SOFT_RESERVED designation. */ - E820_TYPE_RESERVED_KERN = 128, + E820_TYPE_SOFT_RESERVED = 0xefffffff, }; /* diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h index 426fc53ff803..dfbd1ebb9f10 100644 --- a/arch/x86/include/asm/edac.h +++ b/arch/x86/include/asm/edac.h @@ -13,7 +13,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) * are interrupt, DMA and SMP safe. */ for (i = 0; i < size / 4; i++, virt_addr++) - asm volatile("lock; addl $0, %0"::"m" (*virt_addr)); + asm volatile("lock addl $0, %0"::"m" (*virt_addr)); } #endif /* _ASM_X86_EDAC_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 107283b1eb1e..f227a70ac91f 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,11 +3,17 @@ #define _ASM_X86_EFI_H #include <asm/fpu/api.h> -#include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> +#include <asm/ibt.h> +#include <linux/build_bug.h> +#include <linux/kernel.h> +#include <linux/pgtable.h> + +extern unsigned long efi_fw_vendor, efi_config_table; +extern unsigned long efi_mixed_mode_stack_pa; /* * We map the EFI regions needed for runtime services non-contiguously, @@ -18,92 +24,93 @@ * * This is the main reason why we're doing stable VA mappings for RT * services. - * - * This flag is used in conjunction with a chicken bit called - * "efi=old_map" which can be used as a fallback to the old runtime - * services mapping method in case there's some b0rkage with a - * particular EFI implementation (haha, it is hard to hold up the - * sarcasm here...). */ -#define EFI_OLD_MEMMAP EFI_ARCH_1 #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" -#define MAX_CMDLINE_ADDRESS UINT_MAX - #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF -#ifdef CONFIG_X86_32 - -extern asmlinkage unsigned long efi_call_phys(void *, ...); - -#define arch_efi_call_virt_setup() \ -({ \ - kernel_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ -}) +#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE -#define arch_efi_call_virt_teardown() \ -({ \ - firmware_restrict_branch_speculation_end(); \ - kernel_fpu_end(); \ -}) +/* + * The EFI services are called through variadic functions in many cases. These + * functions are implemented in assembler and support only a fixed number of + * arguments. The macros below allows us to check at build time that we don't + * try to call them with too many arguments. + * + * __efi_nargs() will return the number of arguments if it is 7 or less, and + * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it + * impossible to calculate the exact number of arguments beyond some + * pre-defined limit. The maximum number of arguments currently supported by + * any of the thunks is 7, so this is good enough for now and can be extended + * in the obvious way if we ever need more. + */ +#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__) +#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \ + __efi_arg_sentinel(9), __efi_arg_sentinel(8), \ + __efi_arg_sentinel(7), __efi_arg_sentinel(6), \ + __efi_arg_sentinel(5), __efi_arg_sentinel(4), \ + __efi_arg_sentinel(3), __efi_arg_sentinel(2), \ + __efi_arg_sentinel(1), __efi_arg_sentinel(0)) +#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, ...) \ + __take_second_arg(n, \ + ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 10; })) +#define __efi_arg_sentinel(n) , n /* - * Wrap all the virtual calls in a way that forces the parameters on the stack. + * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis + * represents more than n arguments. */ -#define arch_efi_call_virt(p, f, args...) \ -({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ + +#define __efi_nargs_check(f, n, ...) \ + __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n) +#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n) +#define __efi_nargs_check__(f, p, n) ({ \ + BUILD_BUG_ON_MSG( \ + (p) > (n), \ + #f " called with too many arguments (" #p ">" #n ")"); \ }) -#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) +static inline void efi_fpu_begin(void) +{ + /* + * The UEFI calling convention (UEFI spec 2.3.2 and 2.3.4) requires + * that FCW and MXCSR (64-bit) must be initialized prior to calling + * UEFI code. (Oddly the spec does not require that the FPU stack + * be empty.) + */ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +} -#else /* !CONFIG_X86_32 */ +static inline void efi_fpu_end(void) +{ + kernel_fpu_end(); +} -#define EFI_LOADER_SIGNATURE "EL64" +#ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) +#else /* !CONFIG_X86_32 */ +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT -extern asmlinkage u64 efi_call(void *fp, ...); +extern asmlinkage u64 __efi_call(void *fp, ...); -#define efi_call_phys(f, args...) efi_call((f), args) +extern bool efi_disable_ibt_for_runtime; -/* - * struct efi_scratch - Scratch space used while switching to/from efi_mm - * @phys_stack: stack used during EFI Mixed Mode - * @prev_mm: store/restore stolen mm_struct while switching to/from efi_mm - */ -struct efi_scratch { - u64 phys_stack; - struct mm_struct *prev_mm; -} __packed; - -#define arch_efi_call_virt_setup() \ -({ \ - efi_sync_low_kernel_mappings(); \ - kernel_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ - \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ - efi_switch_mm(&efi_mm); \ +#define efi_call(...) ({ \ + __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ + __efi_call(__VA_ARGS__); \ }) -#define arch_efi_call_virt(p, f, args...) \ - efi_call((void *)p->f, args) \ - -#define arch_efi_call_virt_teardown() \ -({ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ - efi_switch_mm(efi_scratch.prev_mm); \ - \ - firmware_restrict_branch_speculation_end(); \ - kernel_fpu_end(); \ +#undef arch_efi_call_virt +#define arch_efi_call_virt(p, f, args...) ({ \ + u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime); \ + ret = efi_call((void *)p->f, args); \ + ibt_restore(ibt); \ + ret; \ }) -extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, - u32 type, u64 attribute); - #ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present @@ -118,138 +125,306 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ -extern struct efi_scratch efi_scratch; -extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); -extern pgd_t * __init efi_call_phys_prolog(void); -extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); -extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void __init old_map_region(efi_memory_desc_t *md); -extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); -extern void efi_switch_mm(struct mm_struct *mm); -extern void efi_recover_from_page_fault(unsigned long phys_addr); +extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); -extern void efi_reserve_boot_services(void); -struct efi_setup_data { - u64 fw_vendor; - u64 runtime; - u64 tables; - u64 smbios; - u64 reserved[8]; -}; +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); extern u64 efi_setup; #ifdef CONFIG_EFI +extern u64 __efi64_thunk(u32, ...); -static inline bool efi_is_native(void) +#define efi64_thunk(...) ({ \ + u64 __pad[3]; /* must have space for 3 args on the stack */ \ + __efi_nargs_check(efi64_thunk, 9, __VA_ARGS__); \ + __efi64_thunk(__VA_ARGS__, __pad); \ +}) + +static inline bool efi_is_mixed(void) { - return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return false; + return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT); } static inline bool efi_runtime_supported(void) { - if (efi_is_native()) + if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) - return true; - - return false; + return IS_ENABLED(CONFIG_EFI_MIXED); } -extern struct console early_efi_console; extern void parse_efi_setup(u64 phys_addr, u32 data_len); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); +extern void efi_thunk_runtime_setup(void); +efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map, + unsigned long systab_phys); + +/* arch specific definitions used by the stub code */ #ifdef CONFIG_EFI_MIXED -extern void efi_thunk_runtime_setup(void); -extern efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map); -#else -static inline void efi_thunk_runtime_setup(void) {} -static inline efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) + +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + +#define ARCH_HAS_EFISTUB_WRAPPERS + +static inline bool efi_is_64bit(void) { - return EFI_SUCCESS; + extern const bool efi_is64; + + return efi_is64; } -#endif /* CONFIG_EFI_MIXED */ +static inline bool efi_is_native(void) +{ + return efi_is_64bit(); +} -/* arch specific definitions used by the stub code */ +#define efi_table_attr(inst, attr) \ + (efi_is_native() ? (inst)->attr \ + : efi_mixed_table_attr((inst), attr)) -struct efi_config { - u64 image_handle; - u64 table; - u64 runtime_services; - u64 boot_services; - u64 text_output; - efi_status_t (*call)(unsigned long, ...); - bool is64; -} __packed; +#define efi_mixed_table_attr(inst, attr) \ + (__typeof__(inst->attr)) \ + _Generic(inst->mixed_mode.attr, \ + u32: (unsigned long)(inst->mixed_mode.attr), \ + default: (inst->mixed_mode.attr)) -__pure const struct efi_config *__efi_early(void); +/* + * The following macros allow translating arguments if necessary from native to + * mixed mode. The use case for this is to initialize the upper 32 bits of + * output parameters, and where the 32-bit method requires a 64-bit argument, + * which must be split up into two arguments to be thunked properly. + * + * As examples, the AllocatePool boot service returns the address of the + * allocation, but it will not set the high 32 bits of the address. To ensure + * that the full 64-bit address is initialized, we zero-init the address before + * calling the thunk. + * + * The FreePages boot service takes a 64-bit physical address even in 32-bit + * mode. For the thunk to work correctly, a native 64-bit call of + * free_pages(addr, size) + * must be translated to + * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size) + * so that the two 32-bit halves of addr get pushed onto the stack separately. + */ -static inline bool efi_is_64bit(void) +static inline void *efi64_zero_upper(void *p) { - if (!IS_ENABLED(CONFIG_X86_64)) - return false; + if (p) + ((u32 *)p)[1] = 0; + return p; +} - if (!IS_ENABLED(CONFIG_EFI_MIXED)) - return true; +static inline u32 efi64_convert_status(efi_status_t status) +{ + return (u32)(status | (u64)status >> 32); +} - return __efi_early()->is64; +#define __efi64_split(val) (val) & U32_MAX, (u64)(val) >> 32 + +#define __efi64_argmap_free_pages(addr, size) \ + ((addr), 0, (size)) + +#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \ + ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver)) + +#define __efi64_argmap_allocate_pool(type, size, buffer) \ + ((type), (size), efi64_zero_upper(buffer)) + +#define __efi64_argmap_locate_handle_buffer(type, proto, key, num, buf) \ + ((type), (proto), (key), efi64_zero_upper(num), efi64_zero_upper(buf)) + +#define __efi64_argmap_create_event(type, tpl, f, c, event) \ + ((type), (tpl), (f), (c), efi64_zero_upper(event)) + +#define __efi64_argmap_set_timer(event, type, time) \ + ((event), (type), lower_32_bits(time), upper_32_bits(time)) + +#define __efi64_argmap_wait_for_event(num, event, index) \ + ((num), (event), efi64_zero_upper(index)) + +#define __efi64_argmap_handle_protocol(handle, protocol, interface) \ + ((handle), (protocol), efi64_zero_upper(interface)) + +#define __efi64_argmap_locate_protocol(protocol, reg, interface) \ + ((protocol), (reg), efi64_zero_upper(interface)) + +#define __efi64_argmap_locate_device_path(protocol, path, handle) \ + ((protocol), (path), efi64_zero_upper(handle)) + +#define __efi64_argmap_exit(handle, status, size, data) \ + ((handle), efi64_convert_status(status), (size), (data)) + +/* PCI I/O */ +#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ + ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ + efi64_zero_upper(dev), efi64_zero_upper(func)) + +/* LoadFile */ +#define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \ + ((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf)) + +/* Graphics Output Protocol */ +#define __efi64_argmap_query_mode(gop, mode, size, info) \ + ((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info)) + +/* TCG2 protocol */ +#define __efi64_argmap_hash_log_extend_event(prot, fl, addr, size, ev) \ + ((prot), (fl), 0ULL, (u64)(addr), 0ULL, (u64)(size), 0ULL, ev) + +/* DXE services */ +#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \ + (__efi64_split(phys), (desc)) + +#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ + (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +/* file protocol */ +#define __efi64_argmap_open(prot, newh, fname, mode, attr) \ + ((prot), efi64_zero_upper(newh), (fname), __efi64_split(mode), \ + __efi64_split(attr)) + +#define __efi64_argmap_set_position(pos) (__efi64_split(pos)) + +/* file system protocol */ +#define __efi64_argmap_open_volume(prot, file) \ + ((prot), efi64_zero_upper(file)) + +/* Memory Attribute Protocol */ +#define __efi64_argmap_get_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), (flags)) + +#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +/* EFI SMBIOS protocol */ +#define __efi64_argmap_get_next(protocol, smbioshandle, type, record, phandle) \ + ((protocol), (smbioshandle), (type), efi64_zero_upper(record), \ + efi64_zero_upper(phandle)) +/* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro + * __efi64_argmap_<method name>, following the examples above. + */ + +#define __efi64_thunk_map(inst, func, ...) \ + efi64_thunk(inst->mixed_mode.func, \ + __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \ + (__VA_ARGS__))) + +#define __efi64_argmap(mapped, args) \ + __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args) +#define __efi64_argmap__0(mapped, args) __efi_eval mapped +#define __efi64_argmap__1(mapped, args) __efi_eval args + +#define __efi_eat(...) +#define __efi_eval(...) __VA_ARGS__ + +static inline efi_status_t __efi64_widen_efi_status(u64 status) +{ + /* use rotate to move the value of bit #31 into position #63 */ + return ror64(rol32(status, 1), 1); } -#define efi_table_attr(table, attr, instance) \ - (efi_is_64bit() ? \ - ((table##_64_t *)(unsigned long)instance)->attr : \ - ((table##_32_t *)(unsigned long)instance)->attr) +/* The macro below handles dispatching via the thunk if needed */ -#define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call(efi_table_attr(protocol, f, instance), \ - instance, ##__VA_ARGS__) +#define efi_fn_call(inst, func, ...) \ + (efi_is_native() ? (inst)->func(__VA_ARGS__) \ + : efi_mixed_call((inst), func, ##__VA_ARGS__)) -#define efi_call_early(f, ...) \ - __efi_early()->call(efi_table_attr(efi_boot_services, f, \ - __efi_early()->boot_services), __VA_ARGS__) +#define efi_mixed_call(inst, func, ...) \ + _Generic(inst->func(__VA_ARGS__), \ + efi_status_t: \ + __efi64_widen_efi_status( \ + __efi64_thunk_map(inst, func, ##__VA_ARGS__)), \ + u64: ({ BUILD_BUG(); ULONG_MAX; }), \ + default: \ + (__typeof__(inst->func(__VA_ARGS__))) \ + __efi64_thunk_map(inst, func, ##__VA_ARGS__)) -#define __efi_call_early(f, ...) \ - __efi_early()->call((unsigned long)f, __VA_ARGS__); +#else /* CONFIG_EFI_MIXED */ + +static inline bool efi_is_64bit(void) +{ + return IS_ENABLED(CONFIG_X86_64); +} -#define efi_call_runtime(f, ...) \ - __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ - __efi_early()->runtime_services), __VA_ARGS__) +#endif /* CONFIG_EFI_MIXED */ extern bool efi_reboot_required(void); +extern bool efi_is_table_address(unsigned long phys_addr); +extern void efi_reserve_boot_services(void); #else static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} static inline bool efi_reboot_required(void) { return false; } +static inline bool efi_is_table_address(unsigned long phys_addr) +{ + return false; +} +static inline void efi_reserve_boot_services(void) +{ +} #endif /* CONFIG_EFI */ +extern int __init efi_memmap_alloc(unsigned int num_entries, + struct efi_memory_map_data *data); + +extern int __init efi_memmap_install(struct efi_memory_map_data *data); +extern int __init efi_memmap_split_count(efi_memory_desc_t *md, + struct range *range); +extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, + void *buf, struct efi_mem_range *mem); + +extern enum efi_secureboot_mode __x86_ima_efi_boot_mode(void); + +#define arch_ima_efi_boot_mode __x86_ima_efi_boot_mode() + +#ifdef CONFIG_EFI_RUNTIME_MAP +int efi_get_runtime_map_size(void); +int efi_get_runtime_map_desc_size(void); +int efi_runtime_map_copy(void *buf, size_t bufsz); +#else +static inline int efi_get_runtime_map_size(void) +{ + return 0; +} + +static inline int efi_get_runtime_map_desc_size(void) +{ + return 0; +} + +static inline int efi_runtime_map_copy(void *buf, size_t bufsz) +{ + return 0; +} + +#endif + #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 69c0f892e310..6c8fdc96be7e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -7,6 +7,7 @@ */ #include <linux/thread_info.h> +#include <asm/ia32.h> #include <asm/ptrace.h> #include <asm/user.h> #include <asm/auxvec.h> @@ -21,8 +22,6 @@ typedef struct user_i387_struct elf_fpregset_t; #ifdef __i386__ -typedef struct user_fxsr_struct elf_fpxregset_t; - #define R_386_NONE 0 #define R_386_32 1 #define R_386_PC32 2 @@ -55,8 +54,9 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ #define R_X86_64_RELATIVE 8 /* Adjust by program base */ -#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative - offset to GOT */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative offset to GOT */ +#define R_X86_64_GOTPCRELX 41 +#define R_X86_64_REX_GOTPCRELX 42 #define R_X86_64_32 10 /* Direct 32 bit zero extended */ #define R_X86_64_32S 11 /* Direct 32 bit sign extended */ #define R_X86_64_16 12 /* Direct 16 bit zero extended */ @@ -76,12 +76,8 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #include <asm/vdso.h> -#ifdef CONFIG_X86_64 extern unsigned int vdso64_enabled; -#endif -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) extern unsigned int vdso32_enabled; -#endif /* * This is used to ensure we don't load something for the wrong architecture. @@ -118,7 +114,7 @@ extern unsigned int vdso32_enabled; * now struct_user_regs, they are different) */ -#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ pr_reg[0] = regs->bx; \ pr_reg[1] = regs->cx; \ @@ -130,6 +126,7 @@ do { \ pr_reg[7] = regs->ds; \ pr_reg[8] = regs->es; \ pr_reg[9] = regs->fs; \ + savesegment(gs, pr_reg[10]); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs; \ @@ -138,18 +135,6 @@ do { \ pr_reg[16] = regs->ss; \ } while (0); -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ -do { \ - ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ - pr_reg[10] = get_user_gs(regs); \ -} while (0); - -#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ -do { \ - ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ - savesegment(gs, pr_reg[10]); \ -} while (0); - #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) @@ -162,13 +147,9 @@ do { \ ((x)->e_machine == EM_X86_64) #define compat_elf_check_arch(x) \ - (elf_check_arch_ia32(x) || \ + ((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \ (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) -#if __USER32_DS != __USER_DS -# error "The following code assumes __USER32_DS == __USER_DS" -#endif - static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) { @@ -188,8 +169,9 @@ static inline void elf_common_init(struct thread_struct *t, #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) -void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp); -#define compat_start_thread compat_start_thread +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32); +#define COMPAT_START_THREAD(ex, regs, new_ip, new_sp) \ + compat_start_thread(regs, new_ip, new_sp, ex->e_machine == EM_X86_64) void set_personality_ia32(bool); #define COMPAT_SET_PERSONALITY(ex) \ @@ -238,7 +220,6 @@ do { \ /* I'm not sure if we can use '-' here */ #define ELF_PLATFORM ("x86_64") extern void set_personality_64bit(void); -extern unsigned int sysctl_vsyscall32; extern int force_personality32; #endif /* !CONFIG_X86_32 */ @@ -281,9 +262,29 @@ extern u32 elf_hwcap2; /* * An executable for which elf_read_implies_exec() returns TRUE will * have the READ_IMPLIES_EXEC personality flag set automatically. + * + * The decision process for determining the results are: + * + * CPU: | lacks NX* | has NX, ia32 | has NX, x86_64 | + * ELF: | | | | + * ---------------------|------------|------------------|----------------| + * missing PT_GNU_STACK | exec-all | exec-all | exec-none | + * PT_GNU_STACK == RWX | exec-stack | exec-stack | exec-stack | + * PT_GNU_STACK == RW | exec-none | exec-none | exec-none | + * + * exec-all : all PROT_READ user mappings are executable, except when + * backed by files on a noexec-filesystem. + * exec-none : only PROT_EXEC user mappings are executable. + * exec-stack: only the stack and PROT_EXEC user mappings are executable. + * + * *this column has no architectural effect: NX markings are ignored by + * hardware, but may have behavioral effects when "wants X" collides with + * "cannot be X" constraints in memory permission flags, as in + * https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com + * */ #define elf_read_implies_exec(ex, executable_stack) \ - (executable_stack != EXSTACK_DISABLE_X) + (mmap_is_ia32() && executable_stack == EXSTACK_DEFAULT) struct task_struct; @@ -293,6 +294,7 @@ do { \ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* @@ -309,6 +311,7 @@ extern unsigned long task_size_32bit(void); extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long get_mmap_base(int is_legacy); extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); +extern unsigned long get_sigframe_size(void); #ifdef CONFIG_X86_32 @@ -330,6 +333,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */ @@ -338,14 +342,15 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) #define AT_SYSINFO 32 #define COMPAT_ARCH_DLINFO \ -if (test_thread_flag(TIF_X32)) \ +if (exec->e_machine == EM_X86_64) \ ARCH_DLINFO_X32; \ -else \ +else if (IS_ENABLED(CONFIG_IA32_EMULATION)) \ ARCH_DLINFO_IA32 #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) @@ -364,8 +369,12 @@ struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp); -#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages + int uses_interp, bool x32); +#define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \ + compat_arch_setup_additional_pages(bprm, interpreter, \ + (ex->e_machine == EM_X86_64)) + +extern bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs); /* Do not change the values. See get_align_mask() */ enum align_flags { @@ -380,5 +389,4 @@ struct va_alignment { } ____cacheline_aligned; extern struct va_alignment va_align; -extern unsigned long align_vdso_addr(unsigned long); #endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/include/asm/elfcore-compat.h b/arch/x86/include/asm/elfcore-compat.h new file mode 100644 index 000000000000..f1b6c7a8d8fc --- /dev/null +++ b/arch/x86/include/asm/elfcore-compat.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_ELFCORE_COMPAT_H +#define _ASM_X86_ELFCORE_COMPAT_H + +#include <asm/user32.h> + +/* + * On amd64 we have two 32bit ABIs - i386 and x32. The latter + * has bigger registers, so we use it for compat_elf_regset_t. + * The former uses i386_elf_prstatus and PRSTATUS_SIZE/SET_PR_FPVALID + * are used to choose the size and location of ->pr_fpvalid of + * the layout actually used. + */ +typedef struct user_regs_struct compat_elf_gregset_t; + +struct i386_elf_prstatus +{ + struct compat_elf_prstatus_common common; + struct user_regs_struct32 pr_reg; + compat_int_t pr_fpvalid; +}; + +#define PRSTATUS_SIZE \ + (user_64bit_mode(task_pt_regs(current)) \ + ? sizeof(struct compat_elf_prstatus) \ + : sizeof(struct i386_elf_prstatus)) +#define SET_PR_FPVALID(S) \ + (*(user_64bit_mode(task_pt_regs(current)) \ + ? &(S)->pr_fpvalid \ + : &((struct i386_elf_prstatus *)(S))->pr_fpvalid) = 1) + +#endif diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h new file mode 100644 index 000000000000..70f5b98a5286 --- /dev/null +++ b/arch/x86/include/asm/emulate_prefix.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMULATE_PREFIX_H +#define _ASM_X86_EMULATE_PREFIX_H + +/* + * Virt escape sequences to trigger instruction emulation; + * ideally these would decode to 'whole' instruction and not destroy + * the instruction stream; sadly this is not true for the 'kvm' one :/ + */ + +#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */ +#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */ + +#endif diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h new file mode 100644 index 000000000000..b1314e41a744 --- /dev/null +++ b/arch/x86/include/asm/enclu.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ENCLU_H +#define _ASM_X86_ENCLU_H + +#define EENTER 0x02 +#define ERESUME 0x03 +#define EEXIT 0x04 + +#endif /* _ASM_X86_ENCLU_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h new file mode 100644 index 000000000000..ce3eb6d5fdf9 --- /dev/null +++ b/arch/x86/include/asm/entry-common.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_X86_ENTRY_COMMON_H +#define _ASM_X86_ENTRY_COMMON_H + +#include <linux/randomize_kstack.h> +#include <linux/user-return-notifier.h> + +#include <asm/nospec-branch.h> +#include <asm/io_bitmap.h> +#include <asm/fpu/api.h> +#include <asm/fred.h> + +/* Check that the stack and regs on entry from user mode are sane. */ +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { + /* + * Make sure that the entry code gave us a sensible EFLAGS + * register. Native because we want to check the actual CPU + * state, not the interrupt state as imagined by Xen. + */ + unsigned long flags = native_save_fl(); + unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT; + + /* + * For !SMAP hardware we patch out CLAC on entry. + */ + if (cpu_feature_enabled(X86_FEATURE_SMAP) || + cpu_feature_enabled(X86_FEATURE_XENPV)) + mask |= X86_EFLAGS_AC; + + WARN_ON_ONCE(flags & mask); + + /* We think we came from user mode. Make sure pt_regs agrees. */ + WARN_ON_ONCE(!user_mode(regs)); + + /* + * All entries from user mode (except #DF) should be on the + * normal thread stack and should have user pt_regs in the + * correct location. + */ + WARN_ON_ONCE(!on_thread_stack()); + WARN_ON_ONCE(regs != task_pt_regs(current)); + } +} +#define arch_enter_from_user_mode arch_enter_from_user_mode + +static inline void arch_exit_work(unsigned long ti_work) +{ + if (ti_work & _TIF_USER_RETURN_NOTIFY) + fire_user_return_notifiers(); + + if (unlikely(ti_work & _TIF_IO_BITMAP)) + tss_update_io_bitmap(); + + if (unlikely(ti_work & _TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + fpregs_assert_state_consistent(); + + if (unlikely(ti_work)) + arch_exit_work(ti_work); + + fred_update_rsp0(); + +#ifdef CONFIG_COMPAT + /* + * Compat syscalls set TS_COMPAT. Make sure we clear it before + * returning to user mode. We need to clear it *after* signal + * handling, because syscall restart has a fixup for compat + * syscalls. The fixup is exercised by the ptrace_syscall_32 + * selftest. + * + * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer + * special case only applies after poking regs and before the + * very next return to user mode. + */ + current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED); +#endif + + /* + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints (see cc_stack_align4/8 in + * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) + * low bits from any entropy chosen here. + * + * Therefore, final stack offset entropy will be 7 (x86_64) or + * 8 (ia32) bits. + */ + choose_random_kstack_offset(rdtsc()); + + /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */ + if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) && + this_cpu_read(x86_ibpb_exit_to_user)) { + indirect_branch_prediction_barrier(); + this_cpu_write(x86_ibpb_exit_to_user, false); + } +} +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare + +static __always_inline void arch_exit_to_user_mode(void) +{ + amd_clear_divider(); +} +#define arch_exit_to_user_mode arch_exit_to_user_mode + +#endif diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h deleted file mode 100644 index 416422762845..000000000000 --- a/arch/x86/include/asm/entry_arch.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file is designed to contain the BUILD_INTERRUPT specifications for - * all of the extra named interrupt vectors used by the architecture. - * Usually this is the Inter Process Interrupts (IPIs) - */ - -/* - * The following vectors are part of the Linux architecture, there - * is no hardware IRQ pin equivalent for them, they are triggered - * through the ICC by us (IPIs) - */ -#ifdef CONFIG_SMP -BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) -BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) -BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR) -BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR) -#endif - -#ifdef CONFIG_HAVE_KVM -BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) -BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR) -BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) -#endif - -/* - * every pentium local APIC has two 'local interrupts', with a - * soft-definable vector attached to both interrupts, one of - * which is a timer interrupt, the other one is error counter - * overflow. Linux uses the local APIC timer interrupt to get - * a much simpler SMP time architecture: - */ -#ifdef CONFIG_X86_LOCAL_APIC - -BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) -BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) -BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) - -#ifdef CONFIG_IRQ_WORK -BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) -#endif - -#ifdef CONFIG_X86_THERMAL_VECTOR -BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) -#endif - -#ifdef CONFIG_X86_MCE_THRESHOLD -BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) -#endif - -#ifdef CONFIG_X86_MCE_AMD -BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR) -#endif -#endif diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h deleted file mode 100644 index 47b7a1296245..000000000000 --- a/arch/x86/include/asm/error-injection.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ERROR_INJECTION_H -#define _ASM_ERROR_INJECTION_H - -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <asm/ptrace.h> -#include <asm-generic/error-injection.h> - -asmlinkage void just_return_func(void); -void override_function_with_return(struct pt_regs *regs); - -#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index d8c2198d543b..a0e0c6b50155 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -1,12 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_EXTABLE_H #define _ASM_X86_EXTABLE_H + +#include <asm/extable_fixup_types.h> + /* - * The exception table consists of triples of addresses relative to the - * exception table entry itself. The first address is of an instruction - * that is allowed to fault, the second is the target at which the program - * should continue. The third is a handler function to deal with the fault - * caused by the instruction in the first field. + * The exception table consists of two addresses relative to the + * exception table entry itself and a type selector field. + * + * The first address is of an instruction that is allowed to fault, the + * second is the target at which the program should continue. + * + * The type entry is used by fixup_exception() to select the handler to + * deal with the fault caused by the instruction in the first field. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -15,7 +21,7 @@ */ struct exception_table_entry { - int insn, fixup, handler; + int insn, fixup, data; }; struct pt_regs; @@ -25,14 +31,30 @@ struct pt_regs; do { \ (a)->fixup = (b)->fixup + (delta); \ (b)->fixup = (tmp).fixup - (delta); \ - (a)->handler = (b)->handler + (delta); \ - (b)->handler = (tmp).handler - (delta); \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ } while (0) extern int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr); -extern int fixup_bug(struct pt_regs *regs, int trapnr); -extern bool ex_has_fault_handler(unsigned long ip); +extern int ex_get_fixup_type(unsigned long ip); extern void early_fixup_exception(struct pt_regs *regs, int trapnr); +#ifdef CONFIG_X86_MCE +extern void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr); +#else +static inline void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) +{ + for (;;) + cpu_relax(); +} +#endif + +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs); +#else +static inline bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) { return false; } +#endif + #endif diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h new file mode 100644 index 000000000000..906b0d5541e8 --- /dev/null +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H +#define _ASM_X86_EXTABLE_FIXUP_TYPES_H + +/* + * Our IMM is signed, as such it must live at the top end of the word. Also, + * since C99 hex constants are of ambiguous type, force cast the mask to 'int' + * so that FIELD_GET() will DTRT and sign extend the value when it extracts it. + */ +#define EX_DATA_TYPE_MASK ((int)0x000000FF) +#define EX_DATA_REG_MASK ((int)0x00000F00) +#define EX_DATA_FLAG_MASK ((int)0x0000F000) +#define EX_DATA_IMM_MASK ((int)0xFFFF0000) + +#define EX_DATA_REG_SHIFT 8 +#define EX_DATA_FLAG_SHIFT 12 +#define EX_DATA_IMM_SHIFT 16 + +#define EX_DATA_REG(reg) ((reg) << EX_DATA_REG_SHIFT) +#define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT) +#define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT) + +/* segment regs */ +#define EX_REG_DS EX_DATA_REG(8) +#define EX_REG_ES EX_DATA_REG(9) +#define EX_REG_FS EX_DATA_REG(10) +#define EX_REG_GS EX_DATA_REG(11) + +/* flags */ +#define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1) +#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2) +#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3) + +/* types */ +#define EX_TYPE_NONE 0 +#define EX_TYPE_DEFAULT 1 +#define EX_TYPE_FAULT 2 +#define EX_TYPE_UACCESS 3 +/* unused, was: #define EX_TYPE_COPY 4 */ +#define EX_TYPE_CLEAR_FS 5 +#define EX_TYPE_FPU_RESTORE 6 +#define EX_TYPE_BPF 7 +#define EX_TYPE_WRMSR 8 +#define EX_TYPE_RDMSR 9 +#define EX_TYPE_WRMSR_SAFE 10 /* reg := -EIO */ +#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */ +#define EX_TYPE_WRMSR_IN_MCE 12 +#define EX_TYPE_RDMSR_IN_MCE 13 +#define EX_TYPE_DEFAULT_MCE_SAFE 14 +#define EX_TYPE_FAULT_MCE_SAFE 15 + +#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */ +#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0)) + +#define EX_TYPE_IMM_REG 17 /* reg := (long)imm */ +#define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT)) +#define EX_TYPE_ZERO_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(0)) +#define EX_TYPE_ONE_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(1)) + +#define EX_TYPE_FAULT_SGX 18 + +#define EX_TYPE_UCOPY_LEN 19 /* cx := reg + imm*cx */ +#define EX_TYPE_UCOPY_LEN1 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(1)) +#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4)) +#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8)) + +#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */ + +#define EX_TYPE_ERETU 21 + +#endif diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h deleted file mode 100644 index ab4c960146e3..000000000000 --- a/arch/x86/include/asm/fb.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_FB_H -#define _ASM_X86_FB_H - -#include <linux/fb.h> -#include <linux/fs.h> -#include <asm/page.h> - -static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, - unsigned long off) -{ - unsigned long prot; - - prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK; - if (boot_cpu_data.x86 > 3) - pgprot_val(vma->vm_page_prot) = - prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS); -} - -extern int fb_is_primary_device(struct fb_info *info); - -#endif /* _ASM_X86_FB_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 50ba74a34a37..4519c9f35ba0 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,24 +14,30 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +#include <asm/kmap_size.h> + /* * Exposed to assembly code for setting up initial page tables. Cannot be * calculated in assembly code (fixmap entries are an enum), but is sanity * checked in the actual fixmap C code to make sure that the fixmap is * covered fully. */ -#define FIXMAP_PMD_NUM 2 +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define FIXMAP_PMD_NUM 2 +#else +# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512)) +# define FIXMAP_PMD_NUM (KM_PMDS + 2) +#endif /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ #define FIXMAP_PMD_TOP 507 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kernel.h> -#include <asm/acpi.h> #include <asm/apicdef.h> #include <asm/page.h> +#include <asm/pgtable_types.h> #ifdef CONFIG_X86_32 #include <linux/threads.h> -#include <asm/kmap_types.h> #else #include <uapi/asm/vsyscall.h> #endif @@ -42,8 +48,7 @@ * Because of this, FIXADDR_TOP x86 integration was left as later work. */ #ifdef CONFIG_X86_32 -/* used by vmalloc.c, vsyscall.lds.S. - * +/* * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ @@ -93,21 +98,16 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif -#ifdef CONFIG_X86_32 +#ifdef CONFIG_KMAP_LOCAL FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif #endif -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL FIX_PARAVIRT_BOOTMAP, #endif - FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ - FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ -#ifdef CONFIG_X86_INTEL_MID - FIX_LNW_VRTC, -#endif #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ @@ -122,7 +122,7 @@ enum fixed_addresses { * before ioremap() is functional. * * If necessary we round it up to the next 512 pages boundary so - * that we can have a single pgd entry and a single pte table: + * that we can have a single pmd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 #define FIX_BTMAPS_SLOTS 8 @@ -154,12 +154,10 @@ extern void reserve_top_address(unsigned long reserve); extern int fixmaps_set; -extern pte_t *kmap_pte; -#define kmap_prot PAGE_KERNEL extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT_XXL @@ -198,5 +196,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index 7ec59edde154..e7a244051c62 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -10,6 +10,7 @@ #ifndef _ASM_X86_FLOPPY_H #define _ASM_X86_FLOPPY_H +#include <linux/sizes.h> #include <linux/vmalloc.h> /* @@ -22,17 +23,14 @@ */ #define _CROSS_64KB(a, s, vdma) \ (!(vdma) && \ - ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) - -#define CROSS_64KB(a, s) _CROSS_64KB(a, s, use_virtual_dma & 1) - + ((unsigned long)(a) / SZ_64K != ((unsigned long)(a) + (s) - 1) / SZ_64K)) #define SW fd_routine[use_virtual_dma & 1] #define CSW fd_routine[can_use_virtual_dma & 1] -#define fd_inb(port) inb_p(port) -#define fd_outb(value, port) outb_p(value, port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) @@ -74,28 +72,28 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) int lcount; char *lptr; - st = 1; for (lcount = virtual_dma_count, lptr = virtual_dma_addr; lcount; lcount--, lptr++) { - st = inb(virtual_dma_port + 4) & 0xa0; - if (st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) - outb_p(*lptr, virtual_dma_port + 5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port + 5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port + 4); + st = inb(virtual_dma_port + FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if (st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT @@ -206,7 +204,7 @@ static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io) static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) { #ifdef FLOPPY_SANITY_CHECK - if (CROSS_64KB(addr, size)) { + if (_CROSS_64KB(addr, size, use_virtual_dma & 1)) { printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size); return -1; } diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h new file mode 100644 index 000000000000..b2743fe19339 --- /dev/null +++ b/arch/x86/include/asm/fpu.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_X86_FPU_H +#define _ASM_X86_FPU_H + +#include <asm/fpu/api.h> + +#define kernel_fpu_available() true + +#endif /* ! _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b56d504af654..cd6f194a912b 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,17 +10,94 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H +#include <linux/bottom_half.h> + +#include <asm/fpu/types.h> /* * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It - * disables preemption so be careful if you intend to use it for long periods - * of time. - * If you intend to use the FPU in softirq you need to check first with - * irq_fpu_usable() if it is possible. + * disables preemption and softirq processing, so be careful if you intend to + * use it for long periods of time. Kernel-mode FPU cannot be used in all + * contexts -- see irq_fpu_usable() for details. */ -extern void kernel_fpu_begin(void); + +/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */ +#define KFPU_387 _BITUL(0) /* 387 state will be initialized */ +#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */ + +extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); +extern void fpregs_mark_activate(void); + +/* Code that is unaware of kernel_fpu_begin_mask() can use this */ +static inline void kernel_fpu_begin(void) +{ +#ifdef CONFIG_X86_64 + /* + * Any 64-bit code that uses 387 instructions must explicitly request + * KFPU_387. + */ + kernel_fpu_begin_mask(KFPU_MXCSR); +#else + /* + * 32-bit kernel code may use 387 operations as well as SSE2, etc, + * as long as it checks that the CPU has the required capability. + */ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +#endif +} + +/* + * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate, or while + * using the FPU in kernel mode. A context switch will (and softirq might) save + * CPU's FPU registers to fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving + * CPU's FPU registers in a random state. + * + * local_bh_disable() protects against both preemption and soft interrupts + * on !RT kernels. + * + * On RT kernels local_bh_disable() is not sufficient because it only + * serializes soft interrupt related sections via a local lock, but stays + * preemptible. Disabling preemption is the right choice here as bottom + * half processing is always in thread context on RT kernels so it + * implicitly prevents bottom half processing as well. + */ +static inline void fpregs_lock(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); +} + +static inline void fpregs_unlock(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +/* + * FPU state gets lazily restored before returning to userspace. So when in the + * kernel, the valid FPU state may be kept in the buffer. This function will force + * restore all the fpu state to the registers early if needed, and lock them from + * being automatically saved/restored. Then FPU state can be modified safely in the + * registers, before unlocking with fpregs_unlock(). + */ +void fpregs_lock_and_load(void); + +#ifdef CONFIG_X86_DEBUG_FPU +extern void fpregs_assert_state_consistent(void); +#else +static inline void fpregs_assert_state_consistent(void) { } +#endif + +/* + * Load the task FPU state before returning to userspace. + */ +extern void switch_fpu_return(void); /* * Query the presence of one or more xfeatures. Works on any legacy CPU as well. @@ -31,4 +108,73 @@ extern bool irq_fpu_usable(void); */ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); +/* Trap handling */ +extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern void fpu_sync_fpstate(struct fpu *fpu); +extern void fpu_reset_from_exception_fixup(void); + +/* Boot, hotplug and resume */ +extern void fpu__init_cpu(void); +extern void fpu__init_system(void); +extern void fpu__init_check_bugs(void); +extern void fpu__resume_cpu(void); + +#ifdef CONFIG_MATH_EMULATION +extern void fpstate_init_soft(struct swregs_state *soft); +#else +static inline void fpstate_init_soft(struct swregs_state *soft) {} +#endif + +/* State tracking */ +DECLARE_PER_CPU(bool, kernel_fpu_allowed); +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +/* Process cleanup */ +#ifdef CONFIG_X86_64 +extern void fpstate_free(struct fpu *fpu); +#else +static inline void fpstate_free(struct fpu *fpu) { } +#endif + +/* fpstate-related functions which are exported to KVM */ +extern void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature); + +extern u64 xstate_get_guest_group_perm(void); + +extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + + +/* KVM specific functions */ +extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); +extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); +extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); +extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures); + +#ifdef CONFIG_X86_64 +extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd); +extern void fpu_sync_guest_vmexit_xfd_state(void); +#else +static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { } +static inline void fpu_sync_guest_vmexit_xfd_state(void) { } +#endif + +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u64 xfeatures, u32 pkru); +extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); + +static inline void fpstate_set_confidential(struct fpu_guest *gfpu) +{ + gfpu->fpstate->is_confidential = true; +} + +static inline bool fpstate_is_confidential(struct fpu_guest *gfpu) +{ + return gfpu->fpstate->is_confidential; +} + +/* prctl */ +extern long fpu_xstate_prctl(int option, unsigned long arg2); + +extern void fpu_idle_fpregs(void); + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h deleted file mode 100644 index fa2c93cb42a2..000000000000 --- a/arch/x86/include/asm/fpu/internal.h +++ /dev/null @@ -1,610 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _ASM_X86_FPU_INTERNAL_H -#define _ASM_X86_FPU_INTERNAL_H - -#include <linux/compat.h> -#include <linux/sched.h> -#include <linux/slab.h> - -#include <asm/user.h> -#include <asm/fpu/api.h> -#include <asm/fpu/xstate.h> -#include <asm/cpufeature.h> -#include <asm/trace/fpu.h> - -/* - * High level FPU state handling functions: - */ -extern void fpu__initialize(struct fpu *fpu); -extern void fpu__prepare_read(struct fpu *fpu); -extern void fpu__prepare_write(struct fpu *fpu); -extern void fpu__save(struct fpu *fpu); -extern void fpu__restore(struct fpu *fpu); -extern int fpu__restore_sig(void __user *buf, int ia32_frame); -extern void fpu__drop(struct fpu *fpu); -extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); -extern void fpu__clear(struct fpu *fpu); -extern int fpu__exception_code(struct fpu *fpu, int trap_nr); -extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); - -/* - * Boot time FPU initialization functions: - */ -extern void fpu__init_cpu(void); -extern void fpu__init_system_xstate(void); -extern void fpu__init_cpu_xstate(void); -extern void fpu__init_system(struct cpuinfo_x86 *c); -extern void fpu__init_check_bugs(void); -extern void fpu__resume_cpu(void); -extern u64 fpu__get_supported_xfeatures_mask(void); - -/* - * Debugging facility: - */ -#ifdef CONFIG_X86_DEBUG_FPU -# define WARN_ON_FPU(x) WARN_ON_ONCE(x) -#else -# define WARN_ON_FPU(x) ({ (void)(x); 0; }) -#endif - -/* - * FPU related CPU feature flag helper routines: - */ -static __always_inline __pure bool use_xsaveopt(void) -{ - return static_cpu_has(X86_FEATURE_XSAVEOPT); -} - -static __always_inline __pure bool use_xsave(void) -{ - return static_cpu_has(X86_FEATURE_XSAVE); -} - -static __always_inline __pure bool use_fxsr(void) -{ - return static_cpu_has(X86_FEATURE_FXSR); -} - -/* - * fpstate handling functions: - */ - -extern union fpregs_state init_fpstate; - -extern void fpstate_init(union fpregs_state *state); -#ifdef CONFIG_MATH_EMULATION -extern void fpstate_init_soft(struct swregs_state *soft); -#else -static inline void fpstate_init_soft(struct swregs_state *soft) {} -#endif - -static inline void fpstate_init_xstate(struct xregs_state *xsave) -{ - /* - * XRSTORS requires these bits set in xcomp_bv, or it will - * trigger #GP: - */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask; -} - -static inline void fpstate_init_fxstate(struct fxregs_state *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} -extern void fpstate_sanitize_xstate(struct fpu *fpu); - -#define user_insn(insn, output, input...) \ -({ \ - int err; \ - \ - might_fault(); \ - \ - asm volatile(ASM_STAC "\n" \ - "1:" #insn "\n\t" \ - "2: " ASM_CLAC "\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) - -#define kernel_insn(insn, output, input...) \ - asm volatile("1:" #insn "\n\t" \ - "2:\n" \ - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ - : output : input) - -static inline int copy_fregs_to_user(struct fregs_state __user *fx) -{ - return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); -} - -static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) - return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - - /* See comment in copy_fxregs_to_kernel() below. */ - return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); -} - -static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) { - kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - } else { - if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { - kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - } else { - /* See comment in copy_fxregs_to_kernel() below. */ - kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); - } - } -} - -static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) - return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - - /* See comment in copy_fxregs_to_kernel() below. */ - return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); -} - -static inline void copy_kernel_to_fregs(struct fregs_state *fx) -{ - kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int copy_user_to_fregs(struct fregs_state __user *fx) -{ - return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline void copy_fxregs_to_kernel(struct fpu *fpu) -{ - if (IS_ENABLED(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) - asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else { - /* Using "rex64; fxsave %0" is broken because, if the memory - * operand uses any extended registers for addressing, a second - * REX prefix will be generated (to the assembler, rex64 - * followed by semicolon is a separate instruction), and hence - * the 64-bitness is lost. - * - * Using "fxsaveq %0" would be the ideal choice, but is only - * supported starting with gas 2.16. - * - * Using, as a workaround, the properly prefixed form below - * isn't accepted by any binutils version so far released, - * complaining that the same type of prefix is used twice if - * an extended register is needed for addressing (fix submitted - * to mainline 2005-11-21). - * - * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave)); - * - * This, however, we can work around by forcing the compiler to - * select an addressing mode that doesn't require extended - * registers. - */ - asm volatile( "rex64/fxsave (%[fx])" - : "=m" (fpu->state.fxsave) - : [fx] "R" (&fpu->state.fxsave)); - } -} - -/* These macros all use (%edi)/(%rdi) as the single memory argument. */ -#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" -#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" -#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" - -#define XSTATE_OP(op, st, lmask, hmask, err) \ - asm volatile("1:" op "\n\t" \ - "xor %[err], %[err]\n" \ - "2:\n\t" \ - ".pushsection .fixup,\"ax\"\n\t" \ - "3: movl $-2,%[err]\n\t" \ - "jmp 2b\n\t" \ - ".popsection\n\t" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact - * format and supervisor states in addition to modified optimization in - * XSAVEOPT. - * - * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT - * supports modified optimization which is not supported by XSAVE. - * - * We use XSAVE as a fallback. - * - * The 661 label is defined in the ALTERNATIVE* macros as the address of the - * original instruction which gets replaced. We need to use it here as the - * address of the instruction where we might get an exception at. - */ -#define XSTATE_XSAVE(st, lmask, hmask, err) \ - asm volatile(ALTERNATIVE_2(XSAVE, \ - XSAVEOPT, X86_FEATURE_XSAVEOPT, \ - XSAVES, X86_FEATURE_XSAVES) \ - "\n" \ - "xor %[err], %[err]\n" \ - "3:\n" \ - ".pushsection .fixup,\"ax\"\n" \ - "4: movl $-2, %[err]\n" \ - "jmp 3b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(661b, 4b) \ - : [err] "=r" (err) \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact - * XSAVE area format. - */ -#define XSTATE_XRESTORE(st, lmask, hmask) \ - asm volatile(ALTERNATIVE(XRSTOR, \ - XRSTORS, X86_FEATURE_XSAVES) \ - "\n" \ - "3:\n" \ - _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ - : \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (static_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (static_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); - else - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - - /* - * We should never fault when copying from a kernel buffer, and the FPU - * state we set at boot time should be valid. - */ - WARN_ON_FPU(err); -} - -/* - * Save processor xstate to xsave area. - */ -static inline void copy_xregs_to_kernel(struct xregs_state *xstate) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON_FPU(!alternatives_patched); - - XSTATE_XSAVE(xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * Restore processor xstate from xsave area. - */ -static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - - XSTATE_XRESTORE(xstate, lmask, hmask); -} - -/* - * Save xstate to user space xsave area. - * - * We don't use modified optimization because xrstor/xrstors might track - * a different application. - * - * We don't use compacted format xsave area for - * backward compatibility for old applications which don't understand - * compacted format of xsave area. - */ -static inline int copy_xregs_to_user(struct xregs_state __user *buf) -{ - int err; - - /* - * Clear the xsave header first, so that reserved fields are - * initialized to zero. - */ - err = __clear_user(&buf->header, sizeof(buf->header)); - if (unlikely(err)) - return -EFAULT; - - stac(); - XSTATE_OP(XSAVE, buf, -1, -1, err); - clac(); - - return err; -} - -/* - * Restore xstate from user space xsave area. - */ -static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) -{ - struct xregs_state *xstate = ((__force struct xregs_state *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - stac(); - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - clac(); - - return err; -} - -/* - * These must be called with preempt disabled. Returns - * 'true' if the FPU state is still intact and we can - * keep registers active. - * - * The legacy FNSAVE instruction cleared all FPU state - * unconditionally, so registers are essentially destroyed. - * Modern FPU state can be kept in registers, if there are - * no pending FP exceptions. - */ -static inline int copy_fpregs_to_fpstate(struct fpu *fpu) -{ - if (likely(use_xsave())) { - copy_xregs_to_kernel(&fpu->state.xsave); - return 1; - } - - if (likely(use_fxsr())) { - copy_fxregs_to_kernel(fpu); - return 1; - } - - /* - * Legacy FPU register saving, FNSAVE always clears FPU registers, - * so we have to mark them inactive: - */ - asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); - - return 0; -} - -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) -{ - if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, mask); - } else { - if (use_fxsr()) - copy_kernel_to_fxregs(&fpstate->fxsave); - else - copy_kernel_to_fregs(&fpstate->fsave); - } -} - -static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) -{ - /* - * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is - * pending. Clear the x87 state here by setting it to fixed values. - * "m" is a random variable that should be in L1. - */ - if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { - asm volatile( - "fnclex\n\t" - "emms\n\t" - "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (fpstate)); - } - - __copy_kernel_to_fpregs(fpstate, -1); -} - -extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); - -/* - * FPU context switch related helper methods: - */ - -DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); - -/* - * The in-register FPU state for an FPU context on a CPU is assumed to be - * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx - * matches the FPU. - * - * If the FPU register state is valid, the kernel can skip restoring the - * FPU state from memory. - * - * Any code that clobbers the FPU registers or updates the in-memory - * FPU state for a task MUST let the rest of the kernel know that the - * FPU registers are no longer valid for this task. - * - * Either one of these invalidation functions is enough. Invalidate - * a resource you control: CPU if using the CPU for something else - * (with preemption disabled), FPU for the current task, or a task that - * is prevented from running by the current task. - */ -static inline void __cpu_invalidate_fpregs_state(void) -{ - __this_cpu_write(fpu_fpregs_owner_ctx, NULL); -} - -static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) -{ - fpu->last_cpu = -1; -} - -static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) -{ - return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; -} - -/* - * These generally need preemption protection to work, - * do try to avoid using these on their own: - */ -static inline void fpregs_deactivate(struct fpu *fpu) -{ - this_cpu_write(fpu_fpregs_owner_ctx, NULL); - trace_x86_fpu_regs_deactivated(fpu); -} - -static inline void fpregs_activate(struct fpu *fpu) -{ - this_cpu_write(fpu_fpregs_owner_ctx, fpu); - trace_x86_fpu_regs_activated(fpu); -} - -/* - * FPU state switching for scheduling. - * - * This is a two-stage process: - * - * - switch_fpu_prepare() saves the old state. - * This is done within the context of the old process. - * - * - switch_fpu_finish() restores the new state as - * necessary. - */ -static inline void -switch_fpu_prepare(struct fpu *old_fpu, int cpu) -{ - if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { - if (!copy_fpregs_to_fpstate(old_fpu)) - old_fpu->last_cpu = -1; - else - old_fpu->last_cpu = cpu; - - /* But leave fpu_fpregs_owner_ctx! */ - trace_x86_fpu_regs_deactivated(old_fpu); - } else - old_fpu->last_cpu = -1; -} - -/* - * Misc helper functions: - */ - -/* - * Set up the userspace FPU context for the new task, if the task - * has used the FPU. - */ -static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) -{ - bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->initialized; - - if (preload) { - if (!fpregs_state_valid(new_fpu, cpu)) - copy_kernel_to_fpregs(&new_fpu->state); - fpregs_activate(new_fpu); - } -} - -/* - * Needs to be preemption-safe. - * - * NOTE! user_fpu_begin() must be used only immediately before restoring - * the save state. It does not do any saving/restoring on its own. In - * lazy FPU mode, it is just an optimization to avoid a #NM exception, - * the task can lose the FPU right after preempt_enable(). - */ -static inline void user_fpu_begin(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - - preempt_disable(); - fpregs_activate(fpu); - preempt_enable(); -} - -/* - * MXCSR and XCR definitions: - */ - -extern unsigned int mxcsr_feature_mask; - -#define XCR_XFEATURE_ENABLED_MASK 0x00000000 - -static inline u64 xgetbv(u32 index) -{ - u32 eax, edx; - - asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((u64)edx << 32); -} - -static inline void xsetbv(u32 index, u64 value) -{ - u32 eax = value; - u32 edx = value >> 32; - - asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ - : : "a" (eax), "d" (edx), "c" (index)); -} - -#endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h index d5bdffb9d27f..697b77e96025 100644 --- a/arch/x86/include/asm/fpu/regset.h +++ b/arch/x86/include/asm/fpu/regset.h @@ -7,11 +7,12 @@ #include <linux/regset.h> -extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; -extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, - xstateregs_get; +extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active, + ssp_active; +extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get, ssp_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, - xstateregs_set; + xstateregs_set, ssp_set; /* * xstateregs_active == regset_fpregs_active. Please refer to the comment diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h new file mode 100644 index 000000000000..89004f4ca208 --- /dev/null +++ b/arch/x86/include/asm/fpu/sched.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FPU_SCHED_H +#define _ASM_X86_FPU_SCHED_H + +#include <linux/sched.h> + +#include <asm/cpufeature.h> +#include <asm/fpu/types.h> + +#include <asm/trace/fpu.h> + +extern void save_fpregs_to_fpstate(struct fpu *fpu); +extern void fpu__drop(struct task_struct *tsk); +extern int fpu_clone(struct task_struct *dst, u64 clone_flags, bool minimal, + unsigned long shstk_addr); +extern void fpu_flush_thread(void); + +/* + * FPU state switching for scheduling. + * + * switch_fpu() saves the old state and sets TIF_NEED_FPU_LOAD if + * TIF_NEED_FPU_LOAD is not set. This is done within the context + * of the old process. + * + * Once TIF_NEED_FPU_LOAD is set, it is required to load the + * registers before returning to userland or using the content + * otherwise. + * + * The FPU context is only stored/restored for a user task and + * PF_KTHREAD is used to distinguish between kernel and user threads. + */ +static inline void switch_fpu(struct task_struct *old, int cpu) +{ + if (!test_tsk_thread_flag(old, TIF_NEED_FPU_LOAD) && + cpu_feature_enabled(X86_FEATURE_FPU) && + !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) { + struct fpu *old_fpu = x86_task_fpu(old); + + set_tsk_thread_flag(old, TIF_NEED_FPU_LOAD); + save_fpregs_to_fpstate(old_fpu); + /* + * The save operation preserved register state, so the + * fpu_fpregs_owner_ctx is still @old_fpu. Store the + * current CPU number in @old_fpu, so the next return + * to user space can avoid the FPU register restore + * when is returns on the same CPU and still owns the + * context. See fpregs_restore_userregs(). + */ + old_fpu->last_cpu = cpu; + + trace_x86_fpu_regs_deactivated(old_fpu); + } +} + +#endif /* _ASM_X86_FPU_SCHED_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 44bbc39a57b3..eccc75bc9c4f 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -5,30 +5,33 @@ #ifndef _ASM_X86_FPU_SIGNAL_H #define _ASM_X86_FPU_SIGNAL_H +#include <linux/compat.h> +#include <linux/user.h> + +#include <asm/fpu/types.h> + #ifdef CONFIG_X86_64 # include <uapi/asm/sigcontext.h> # include <asm/user32.h> -struct ksignal; -int ia32_setup_rt_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); #else # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct -# define ia32_setup_frame __setup_frame -# define ia32_setup_rt_frame __setup_rt_frame #endif extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); -extern void convert_to_fxsr(struct task_struct *tsk, +extern void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env); unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size); -extern void fpu__init_prepare_fx_sw_frame(void); +unsigned long fpu__get_fpstate_size(void); + +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru); +extern void fpu__clear_user_states(struct fpu *fpu); +extern bool fpu__restore_sig(void __user *buf, int ia32_frame); +extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask); #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 202c53918ecf..93e99d2583d6 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -2,8 +2,10 @@ /* * FPU data structures: */ -#ifndef _ASM_X86_FPU_H -#define _ASM_X86_FPU_H +#ifndef _ASM_X86_FPU_TYPES_H +#define _ASM_X86_FPU_TYPES_H + +#include <asm/page_types.h> /* * The legacy x87 FPU state format, as saved by FSAVE and @@ -114,6 +116,16 @@ enum xfeature { XFEATURE_Hi16_ZMM, XFEATURE_PT_UNIMPLEMENTED_SO_FAR, XFEATURE_PKRU, + XFEATURE_PASID, + XFEATURE_CET_USER, + XFEATURE_CET_KERNEL, + XFEATURE_RSRVD_COMP_13, + XFEATURE_RSRVD_COMP_14, + XFEATURE_LBR, + XFEATURE_RSRVD_COMP_16, + XFEATURE_XTILE_CFG, + XFEATURE_XTILE_DATA, + XFEATURE_APX, XFEATURE_MAX, }; @@ -128,12 +140,26 @@ enum xfeature { #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) #define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR) #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) +#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) +#define XFEATURE_MASK_CET_USER (1 << XFEATURE_CET_USER) +#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL) +#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) +#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG) +#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA) +#define XFEATURE_MASK_APX (1 << XFEATURE_APX) #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ | XFEATURE_MASK_ZMM_Hi256 \ | XFEATURE_MASK_Hi16_ZMM) +#ifdef CONFIG_X86_64 +# define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \ + | XFEATURE_MASK_XTILE_CFG) +#else +# define XFEATURE_MASK_XTILE (0) +#endif + #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM struct reg_128_bit { @@ -145,6 +171,9 @@ struct reg_256_bit { struct reg_512_bit { u8 regbytes[512/8]; }; +struct reg_1024_byte { + u8 regbytes[1024]; +}; /* * State component 2: @@ -229,6 +258,78 @@ struct pkru_state { u32 pad; } __packed; +/* + * State component 11 is Control-flow Enforcement user states + */ +struct cet_user_state { + /* user control-flow settings */ + u64 user_cet; + /* user shadow stack pointer */ + u64 user_ssp; +}; + +/* + * State component 12 is Control-flow Enforcement supervisor states. + * This state includes SSP pointers for privilege levels 0 through 2. + */ +struct cet_supervisor_state { + u64 pl0_ssp; + u64 pl1_ssp; + u64 pl2_ssp; +} __packed; + +/* + * State component 15: Architectural LBR configuration state. + * The size of Arch LBR state depends on the number of LBRs (lbr_depth). + */ + +struct lbr_entry { + u64 from; + u64 to; + u64 info; +}; + +struct arch_lbr_state { + u64 lbr_ctl; + u64 lbr_depth; + u64 ler_from; + u64 ler_to; + u64 ler_info; + struct lbr_entry entries[]; +}; + +/* + * State component 17: 64-byte tile configuration register. + */ +struct xtile_cfg { + u64 tcfg[8]; +} __packed; + +/* + * State component 18: 1KB tile data register. + * Each register represents 16 64-byte rows of the matrix + * data. But the number of registers depends on the actual + * implementation. + */ +struct xtile_data { + struct reg_1024_byte tmm; +} __packed; + +/* + * State component 19: 8B extended general purpose register. + */ +struct apx_state { + u64 egpr[16]; +} __packed; + +/* + * State component 10 is supervisor state used for context-switching the + * PASID state. + */ +struct ia32_pasid_state { + u64 pasid; +} __packed; + struct xstate_header { u64 xfeatures; u64 xcomp_bv; @@ -253,7 +354,7 @@ struct xstate_header { struct xregs_state { struct fxregs_state i387; struct xstate_header header; - u8 extended_state_area[0]; + u8 extended_state_area[]; } __attribute__ ((packed, aligned (64))); /* @@ -273,6 +374,95 @@ union fpregs_state { u8 __padding[PAGE_SIZE]; }; +struct fpstate { + /* @kernel_size: The size of the kernel register image */ + unsigned int size; + + /* @user_size: The size in non-compacted UABI format */ + unsigned int user_size; + + /* @xfeatures: xfeatures for which the storage is sized */ + u64 xfeatures; + + /* @user_xfeatures: xfeatures valid in UABI buffers */ + u64 user_xfeatures; + + /* @xfd: xfeatures disabled to trap userspace use. */ + u64 xfd; + + /* @is_valloc: Indicator for dynamically allocated state */ + unsigned int is_valloc : 1; + + /* @is_guest: Indicator for guest state (KVM) */ + unsigned int is_guest : 1; + + /* + * @is_confidential: Indicator for KVM confidential mode. + * The FPU registers are restored by the + * vmentry firmware from encrypted guest + * memory. On vmexit the FPU registers are + * saved by firmware to encrypted guest memory + * and the registers are scrubbed before + * returning to the host. So there is no + * content which is worth saving and restoring. + * The fpstate has to be there so that + * preemption and softirq FPU usage works + * without special casing. + */ + unsigned int is_confidential : 1; + + /* @in_use: State is in use */ + unsigned int in_use : 1; + + /* @regs: The register state union for all supported formats */ + union fpregs_state regs; + + /* @regs is dynamically sized! Don't add anything after @regs! */ +} __aligned(64); + +#define FPU_GUEST_PERM_LOCKED BIT_ULL(63) + +struct fpu_state_perm { + /* + * @__state_perm: + * + * This bitmap indicates the permission for state components + * available to a thread group, including both user and supervisor + * components and software-defined bits like FPU_GUEST_PERM_LOCKED. + * The permission prctl() sets the enabled state bits in + * thread_group_leader()->thread.fpu. + * + * All run time operations use the per thread information in the + * currently active fpu.fpstate which contains the xfeature masks + * and sizes for kernel and user space. + * + * This master permission field is only to be used when + * task.fpu.fpstate based checks fail to validate whether the task + * is allowed to expand its xfeatures set which requires to + * allocate a larger sized fpstate buffer. + * + * Do not access this field directly. Use the provided helper + * function. Unlocked access is possible for quick checks. + */ + u64 __state_perm; + + /* + * @__state_size: + * + * The size required for @__state_perm. Only valid to access + * with sighand locked. + */ + unsigned int __state_size; + + /* + * @__user_state_size: + * + * The size required for @__state_perm user part. Only valid to + * access with sighand locked. + */ + unsigned int __user_state_size; +}; + /* * Highest level per task FPU state data structure that * contains the FPU register state plus various FPU @@ -294,28 +484,164 @@ struct fpu { unsigned int last_cpu; /* - * @initialized: + * @avx512_timestamp: * - * This flag indicates whether this context is initialized: if the task - * is not running then we can restore from this context, if the task - * is running then we should save into this context. + * Records the timestamp of AVX512 use during last context switch. */ - unsigned char initialized; + unsigned long avx512_timestamp; /* - * @state: + * @fpstate: * - * In-memory copy of all FPU registers that we save/restore - * over context switches. If the task is using the FPU then - * the registers in the FPU are more recent than this state - * copy. If the task context-switches away then they get - * saved here and represent the FPU state. + * Pointer to the active struct fpstate. Initialized to + * point at @__fpstate below. */ - union fpregs_state state; + struct fpstate *fpstate; + /* - * WARNING: 'state' is dynamically-sized. Do not put + * @__task_fpstate: + * + * Pointer to an inactive struct fpstate. Initialized to NULL. Is + * used only for KVM support to swap out the regular task fpstate. + */ + struct fpstate *__task_fpstate; + + /* + * @perm: + * + * Permission related information + */ + struct fpu_state_perm perm; + + /* + * @guest_perm: + * + * Permission related information for guest pseudo FPUs + */ + struct fpu_state_perm guest_perm; + + /* + * @__fpstate: + * + * Initial in-memory storage for FPU registers which are saved in + * context switch and when the kernel uses the FPU. The registers + * are restored from this storage on return to user space if they + * are not longer containing the tasks FPU register state. + */ + struct fpstate __fpstate; + /* + * WARNING: '__fpstate' is dynamically-sized. Do not put * anything after it here. */ }; -#endif /* _ASM_X86_FPU_H */ +/* + * Guest pseudo FPU container + */ +struct fpu_guest { + /* + * @xfeatures: xfeature bitmap of features which are + * currently enabled for the guest vCPU. + */ + u64 xfeatures; + + /* + * @xfd_err: Save the guest value. + */ + u64 xfd_err; + + /* + * @uabi_size: Size required for save/restore + */ + unsigned int uabi_size; + + /* + * @fpstate: Pointer to the allocated guest fpstate + */ + struct fpstate *fpstate; +}; + +/* + * FPU state configuration data for fpu_guest. + * Initialized at boot time. Read only after init. + */ +struct vcpu_fpu_config { + /* + * @size: + * + * The default size of the register state buffer in guest FPUs. + * Includes all supported features except independent managed + * features and features which have to be requested by user space + * before usage. + */ + unsigned int size; + + /* + * @features: + * + * The default supported features bitmap in guest FPUs. Does not + * include independent managed features and features which have to + * be requested by user space before usage. + */ + u64 features; +}; + +/* + * FPU state configuration data. Initialized at boot time. Read only after init. + */ +struct fpu_state_config { + /* + * @max_size: + * + * The maximum size of the register state buffer. Includes all + * supported features except independent managed features. + */ + unsigned int max_size; + + /* + * @default_size: + * + * The default size of the register state buffer. Includes all + * supported features except independent managed features, + * guest-only features and features which have to be requested by + * user space before usage. + */ + unsigned int default_size; + + /* + * @max_features: + * + * The maximum supported features bitmap. Does not include + * independent managed features. + */ + u64 max_features; + + /* + * @default_features: + * + * The default supported features bitmap. Does not include + * independent managed features, guest-only features and features + * which have to be requested by user space before usage. + */ + u64 default_features; + /* + * @legacy_features: + * + * Features which can be reported back to user space + * even without XSAVE support, i.e. legacy features FP + SSE + */ + u64 legacy_features; + /* + * @independent_features: + * + * Features that are supported by XSAVES, but not managed as part of + * the FPU core, such as LBR + */ + u64 independent_features; +}; + +/* FPU state configuration information */ +extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; +extern struct vcpu_fpu_config guest_default_cfg; + +#endif /* _ASM_X86_FPU_TYPES_H */ diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h new file mode 100644 index 000000000000..9a710c060445 --- /dev/null +++ b/arch/x86/include/asm/fpu/xcr.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FPU_XCR_H +#define _ASM_X86_FPU_XCR_H + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 +#define XCR_XFEATURE_IN_USE_MASK 0x00000001 + +static __always_inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); +} + +/* + * Return a mask of xfeatures which are currently being tracked + * by the processor as being in the initial configuration. + * + * Callers should check X86_FEATURE_XGETBV1. + */ +static __always_inline u64 xfeatures_in_use(void) +{ + return xgetbv(XCR_XFEATURE_IN_USE_MASK); +} + +#endif /* _ASM_X86_FPU_XCR_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 48581988d78c..7a7dc9d56027 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -2,15 +2,16 @@ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H +#include <linux/uaccess.h> #include <linux/types.h> + #include <asm/processor.h> -#include <linux/uaccess.h> +#include <asm/fpu/api.h> +#include <asm/user.h> /* Bit 63 of XCR0 is reserved for future expansion */ #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) -#define XSTATE_CPUID 0x0000000d - #define FXSAVE_SIZE 512 #define XSAVE_HDR_SIZE 64 @@ -19,42 +20,115 @@ #define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) -/* Supervisor features */ -#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) +#define XSAVE_ALIGNMENT 64 -/* All currently supported features */ -#define XCNTXT_MASK (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE | \ - XFEATURE_MASK_YMM | \ - XFEATURE_MASK_OPMASK | \ - XFEATURE_MASK_ZMM_Hi256 | \ - XFEATURE_MASK_Hi16_ZMM | \ - XFEATURE_MASK_PKRU | \ - XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR) +/* All currently supported user features */ +#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_XTILE | \ + XFEATURE_MASK_APX) -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif +/* + * Features which are restored when returning to user space. + * PKRU is not restored on return to user space because PKRU + * is switched eagerly in switch_to() and flush_thread() + */ +#define XFEATURE_MASK_USER_RESTORE \ + (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) + +/* Features which are dynamically enabled for a process on request */ +#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA + +/* Supervisor features which are enabled only in guest FPUs */ +#define XFEATURE_MASK_GUEST_SUPERVISOR XFEATURE_MASK_CET_KERNEL + +/* All currently supported supervisor features */ +#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \ + XFEATURE_MASK_CET_USER | \ + XFEATURE_MASK_GUEST_SUPERVISOR) + +/* + * A supervisor state component may not always contain valuable information, + * and its size may be huge. Saving/restoring such supervisor state components + * at each context switch can cause high CPU and space overhead, which should + * be avoided. Such supervisor state components should only be saved/restored + * on demand. The on-demand supervisor features are set in this mask. + * + * Unlike the existing supported supervisor features, an independent supervisor + * feature does not allocate a buffer in task->fpu, and the corresponding + * supervisor state component cannot be saved/restored at each context switch. + * + * To support an independent supervisor feature, a developer should follow the + * dos and don'ts as below: + * - Do dynamically allocate a buffer for the supervisor state component. + * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the + * state component to/from the buffer. + * - Don't set the bit corresponding to the independent supervisor feature in + * IA32_XSS at run time, since it has been set at boot time. + */ +#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR) + +/* + * Unsupported supervisor features. When a supervisor feature in this mask is + * supported in the future, move it to the supported supervisor feature mask. + */ +#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT) + +/* All supervisor states including supported and unsupported states. */ +#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ + XFEATURE_MASK_INDEPENDENT | \ + XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) + +/* + * The feature mask required to restore FPU state: + * - All user states which are not eagerly switched in switch_to()/exec() + * - The suporvisor states + */ +#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ + XFEATURE_MASK_SUPERVISOR_SUPPORTED) + +/* + * Features in this mask have space allocated in the signal frame, but may not + * have that space initialized when the feature is in its init state. + */ +#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \ + XFEATURE_MASK_USER_DYNAMIC) -extern u64 xfeatures_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); -void fpu__xstate_clear_all_cpu_caps(void); -void *get_xsave_addr(struct xregs_state *xsave, int xstate); -const void *get_xsave_field_ptr(int xstate_field); -int using_compacted_format(void); -int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); -int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); -int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); -int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); - -/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -extern int validate_xstate_header(const struct xstate_header *hdr); +int xfeature_size(int xfeature_nr); + +void xsaves(struct xregs_state *xsave, u64 mask); +void xrstors(struct xregs_state *xsave, u64 mask); + +int xfd_enable_feature(u64 xfd_err); + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); +#endif + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); + +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return static_branch_unlikely(&__fpu_state_size_dynamic); +} +#else +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return false; +} +#endif #endif diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 5cbce6fbb534..0ab65073c1cc 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -11,7 +11,7 @@ #ifdef CONFIG_FRAME_POINTER -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro FRAME_BEGIN push %_ASM_BP @@ -22,7 +22,36 @@ pop %_ASM_BP .endm -#else /* !__ASSEMBLY__ */ +#ifdef CONFIG_X86_64 +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just setting the LSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts + * the original rbp. + */ +.macro ENCODE_FRAME_POINTER ptregs_offset=0 + leaq 1+\ptregs_offset(%rsp), %rbp +.endm +#else /* !CONFIG_X86_64 */ +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just clearing the MSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the + * original ebp. + */ +.macro ENCODE_FRAME_POINTER + mov %esp, %ebp + andl $0x7fffffff, %ebp +.endm +#endif /* CONFIG_X86_64 */ + +#else /* !__ASSEMBLER__ */ #define FRAME_BEGIN \ "push %" _ASM_BP "\n" \ @@ -30,12 +59,51 @@ #define FRAME_END "pop %" _ASM_BP "\n" -#endif /* __ASSEMBLY__ */ +#ifdef CONFIG_X86_64 + +#define ENCODE_FRAME_POINTER \ + "lea 1(%rsp), %rbp\n\t" + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return (unsigned long)regs + 1; +} + +#else /* !CONFIG_X86_64 */ + +#define ENCODE_FRAME_POINTER \ + "movl %esp, %ebp\n\t" \ + "andl $0x7fffffff, %ebp\n\t" + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return (unsigned long)regs & 0x7fffffff; +} + +#endif /* CONFIG_X86_64 */ + +#endif /* __ASSEMBLER__ */ #define FRAME_OFFSET __ASM_SEL(4, 8) #else /* !CONFIG_FRAME_POINTER */ +#ifdef __ASSEMBLER__ + +.macro ENCODE_FRAME_POINTER ptregs_offset=0 +.endm + +#else /* !__ASSEMBLER__ */ + +#define ENCODE_FRAME_POINTER + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return 0; +} + +#endif + #define FRAME_BEGIN #define FRAME_END #define FRAME_OFFSET 0 diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h new file mode 100644 index 000000000000..2bb65677c079 --- /dev/null +++ b/arch/x86/include/asm/fred.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Macros for Flexible Return and Event Delivery (FRED) + */ + +#ifndef ASM_X86_FRED_H +#define ASM_X86_FRED_H + +#include <linux/const.h> + +#include <asm/asm.h> +#include <asm/msr.h> +#include <asm/trapnr.h> + +/* + * FRED event return instruction opcodes for ERET{S,U}; supported in + * binutils >= 2.41. + */ +#define ERETS _ASM_BYTES(0xf2,0x0f,0x01,0xca) +#define ERETU _ASM_BYTES(0xf3,0x0f,0x01,0xca) + +/* + * RSP is aligned to a 64-byte boundary before used to push a new stack frame + */ +#define FRED_STACK_FRAME_RSP_MASK _AT(unsigned long, (~0x3f)) + +/* + * Used for the return address for call emulation during code patching, + * and measured in 64-byte cache lines. + */ +#define FRED_CONFIG_REDZONE_AMOUNT 1 +#define FRED_CONFIG_REDZONE (_AT(unsigned long, FRED_CONFIG_REDZONE_AMOUNT) << 6) +#define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9) +#define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p)) + +#ifndef __ASSEMBLER__ + +#ifdef CONFIG_X86_FRED +#include <linux/kernel.h> +#include <linux/sched/task_stack.h> + +#include <asm/ptrace.h> + +struct fred_info { + /* Event data: CR2, DR6, ... */ + unsigned long edata; + unsigned long resv; +}; + +/* Full format of the FRED stack frame */ +struct fred_frame { + struct pt_regs regs; + struct fred_info info; +}; + +static __always_inline struct fred_info *fred_info(struct pt_regs *regs) +{ + return &container_of(regs, struct fred_frame, regs)->info; +} + +static __always_inline unsigned long fred_event_data(struct pt_regs *regs) +{ + return fred_info(regs)->edata; +} + +void asm_fred_entrypoint_user(void); +void asm_fred_entrypoint_kernel(void); +void asm_fred_entry_from_kvm(struct fred_ss); + +__visible void fred_entry_from_user(struct pt_regs *regs); +__visible void fred_entry_from_kernel(struct pt_regs *regs); +__visible void __fred_entry_from_kvm(struct pt_regs *regs); + +/* Can be called from noinstr code, thus __always_inline */ +static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) +{ + struct fred_ss ss = { + .ss =__KERNEL_DS, + .type = type, + .vector = vector, + .nmi = type == EVENT_TYPE_NMI, + .l = 1, + }; + + asm_fred_entry_from_kvm(ss); +} + +void cpu_init_fred_exceptions(void); +void cpu_init_fred_rsps(void); +void fred_complete_exception_setup(void); + +DECLARE_PER_CPU(unsigned long, fred_rsp0); + +static __always_inline void fred_sync_rsp0(unsigned long rsp0) +{ + __this_cpu_write(fred_rsp0, rsp0); +} + +static __always_inline void fred_update_rsp0(void) +{ + unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE; + + if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) { + wrmsrns(MSR_IA32_FRED_RSP0, rsp0); + __this_cpu_write(fred_rsp0, rsp0); + } +} +#else /* CONFIG_X86_FRED */ +static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; } +static inline void cpu_init_fred_exceptions(void) { } +static inline void cpu_init_fred_rsps(void) { } +static inline void fred_complete_exception_setup(void) { } +static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { } +static inline void fred_sync_rsp0(unsigned long rsp0) { } +static inline void fred_update_rsp0(void) { } +#endif /* CONFIG_X86_FRED */ +#endif /* !__ASSEMBLER__ */ + +#endif /* ASM_X86_FRED_H */ diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h index bca4c743de77..ab2547f97c2c 100644 --- a/arch/x86/include/asm/fsgsbase.h +++ b/arch/x86/include/asm/fsgsbase.h @@ -2,11 +2,11 @@ #ifndef _ASM_FSGSBASE_H #define _ASM_FSGSBASE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_X86_64 -#include <asm/msr-index.h> +#include <asm/msr.h> /* * Read/write a task's FSBASE or GSBASE. This returns the value that @@ -19,38 +19,67 @@ extern unsigned long x86_gsbase_read_task(struct task_struct *task); extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); -/* Helper functions for reading/writing FS/GS base */ +/* Must be protected by X86_FEATURE_FSGSBASE check. */ -static inline unsigned long x86_fsbase_read_cpu(void) +static __always_inline unsigned long rdfsbase(void) { unsigned long fsbase; - rdmsrl(MSR_FS_BASE, fsbase); + asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory"); return fsbase; } -static inline unsigned long x86_gsbase_read_cpu_inactive(void) +static __always_inline unsigned long rdgsbase(void) { unsigned long gsbase; - rdmsrl(MSR_KERNEL_GS_BASE, gsbase); + asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory"); return gsbase; } -static inline void x86_fsbase_write_cpu(unsigned long fsbase) +static __always_inline void wrfsbase(unsigned long fsbase) { - wrmsrl(MSR_FS_BASE, fsbase); + asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); } -static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase) +static __always_inline void wrgsbase(unsigned long gsbase) { - wrmsrl(MSR_KERNEL_GS_BASE, gsbase); + asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); } +#include <asm/cpufeature.h> + +/* Helper functions for reading/writing FS/GS base */ + +static inline unsigned long x86_fsbase_read_cpu(void) +{ + unsigned long fsbase; + + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) + fsbase = rdfsbase(); + else + rdmsrq(MSR_FS_BASE, fsbase); + + return fsbase; +} + +static inline void x86_fsbase_write_cpu(unsigned long fsbase) +{ + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) + wrfsbase(fsbase); + else + wrmsrq(MSR_FS_BASE, fsbase); +} + +extern unsigned long x86_gsbase_read_cpu_inactive(void); +extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase); +extern unsigned long x86_fsgsbase_read_task(struct task_struct *task, + unsigned short selector); + #endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_FSGSBASE_H */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index cf350639e76d..b08c95872eed 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -2,24 +2,27 @@ #ifndef _ASM_X86_FTRACE_H #define _ASM_X86_FTRACE_H +#include <asm/ptrace.h> + #ifdef CONFIG_FUNCTION_TRACER -#ifdef CC_USING_FENTRY -# define MCOUNT_ADDR ((unsigned long)(__fentry__)) -#else -# define MCOUNT_ADDR ((unsigned long)(mcount)) -# define HAVE_FUNCTION_GRAPH_FP_TEST +#ifndef CC_USING_FENTRY +# error Compiler does not support fentry? #endif +# define MCOUNT_ADDR ((unsigned long)(__fentry__)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +/* Ignore unused weak functions which will have non zero offsets */ +#ifdef CONFIG_HAVE_FENTRY +# include <asm/ibt.h> +/* Add offset for endbr64 if IBT enabled */ +# define FTRACE_MCOUNT_MAX_OFFSET ENDBR_INSN_SIZE +#endif + #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - -#ifndef __ASSEMBLY__ -extern void mcount(void); -extern atomic_t modifying_ftrace_code; +#ifndef __ASSEMBLER__ extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) @@ -31,33 +34,109 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) +{ + if (is_endbr((void*)(fentry_ip - ENDBR_INSN_SIZE))) + fentry_ip -= ENDBR_INSN_SIZE; + + return fentry_ip; +} +#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip) + +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + +#include <linux/ftrace_regs.h> + +static __always_inline struct pt_regs * +arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + /* Only when FL_SAVE_REGS is set, cs will be non zero */ + if (!arch_ftrace_regs(fregs)->regs.cs) + return NULL; + return &arch_ftrace_regs(fregs)->regs; +} + +#define arch_ftrace_partial_regs(regs) do { \ + regs->flags &= ~X86_EFLAGS_FIXED; \ + regs->cs = __KERNEL_CS; \ +} while (0) + +#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ + (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \ + (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \ + (_regs)->cs = __KERNEL_CS; \ + (_regs)->flags = 0; \ + } while (0) + +#define ftrace_regs_set_instruction_pointer(fregs, _ip) \ + do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0) + + +static __always_inline unsigned long +ftrace_regs_get_return_address(struct ftrace_regs *fregs) +{ + return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs); +} + +struct ftrace_ops; +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#else +#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When a ftrace registered caller is tracing a function that is + * also set by a register_ftrace_direct() call, it needs to be + * differentiated in the ftrace_caller trampoline. To do this, we + * place the direct caller in the ORIG_AX part of pt_regs. This + * tells the ftrace_caller that there's a direct caller. + */ +static inline void +__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + /* Emulate a call */ + regs->orig_ax = addr; +} +#define arch_ftrace_set_direct_caller(fregs, addr) \ + __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -int ftrace_int3_handler(struct pt_regs *regs); - -#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR - #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ + +void prepare_ftrace_return(unsigned long ip, unsigned long *parent, + unsigned long frame_pointer); + +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +extern void set_ftrace_ops_ro(void); +#else +static inline void set_ftrace_ops_ro(void) { } +#endif #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { /* * Compare the symbol name with the system call name. Skip the - * "__x64_sys", "__ia32_sys" or simple "sys" prefix. + * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix. */ return !strcmp(sym + 3, name + 3) || (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) || - (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)); + (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) || + (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3)); } #ifndef COMPILE_OFFSETS @@ -80,6 +159,6 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ #endif /* !COMPILE_OFFSETS */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 13c83fe97988..fe5d9a10d900 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -12,76 +12,86 @@ #include <asm/processor.h> #include <asm/smap.h> -#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ - asm volatile("\t" ASM_STAC "\n" \ - "1:\t" insn "\n" \ - "2:\t" ASM_CLAC "\n" \ - "\t.section .fixup,\"ax\"\n" \ - "3:\tmov\t%3, %1\n" \ - "\tjmp\t2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ +#define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \ +do { \ + int oldval = 0, ret; \ + asm volatile("1:\t" insn "\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ - : "i" (-EFAULT), "0" (oparg), "1" (0)) + : "0" (oparg), "1" (0)); \ + if (ret) \ + goto label; \ + *oval = oldval; \ +} while(0) -#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ - asm volatile("\t" ASM_STAC "\n" \ - "1:\tmovl %2, %0\n" \ - "\tmovl\t%0, %3\n" \ + +#define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \ +do { \ + int oldval = 0, ret, tem; \ + asm volatile("1:\tmovl %2, %0\n" \ + "2:\tmovl\t%0, %3\n" \ "\t" insn "\n" \ - "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ - "\tjnz\t1b\n" \ - "3:\t" ASM_CLAC "\n" \ - "\t.section .fixup,\"ax\"\n" \ - "4:\tmov\t%5, %1\n" \ - "\tjmp\t3b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 4b) \ - _ASM_EXTABLE_UA(2b, 4b) \ + "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ + "\tjnz\t2b\n" \ + "4:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \ + _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ - : "r" (oparg), "i" (-EFAULT), "1" (0)) + : "r" (oparg), "1" (0)); \ + if (ret) \ + goto label; \ + *oval = oldval; \ +} while(0) -static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) +static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, + u32 __user *uaddr) { - int oldval = 0, ret, tem; - - pagefault_disable(); - - switch (op) { - case FUTEX_OP_SET: - __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_ADD: - __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, - uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); - break; - default: - ret = -ENOSYS; + scoped_user_rw_access(uaddr, Efault) { + switch (op) { + case FUTEX_OP_SET: + unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ADD: + unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_OR: + unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ANDN: + unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); + break; + case FUTEX_OP_XOR: + unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); + break; + default: + return -ENOSYS; + } } - - pagefault_enable(); - - if (!ret) - *oval = oldval; - - return ret; + return 0; +Efault: + return -EFAULT; } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval); + int ret = 0; + + scoped_user_rw_access(uaddr, Efault) { + asm_inline volatile("\n" + "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" + "2:\n" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "r" (newval), "1" (oldval) + : "memory"); + *uval = oldval; + } + return ret; +Efault: + return -EFAULT; } #endif diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 318556574345..5af8088a10df 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -38,7 +38,7 @@ extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); -extern int gart_iommu_hole_init(void); +void gart_iommu_hole_init(void); #else #define gart_iommu_aperture 0 @@ -51,9 +51,8 @@ static inline void early_gart_iommu_check(void) static inline void gart_parse_options(char *options) { } -static inline int gart_iommu_hole_init(void) +static inline void gart_iommu_hole_init(void) { - return -ENODEV; } #endif diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index 7cd73552a4e8..3c7267ef4a9e 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AMD Geode definitions * Copyright (C) 2006, Advanced Micro Devices, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. */ #ifndef _ASM_X86_GEODE_H diff --git a/arch/x86/include/asm/gsseg.h b/arch/x86/include/asm/gsseg.h new file mode 100644 index 000000000000..ab6a595cea70 --- /dev/null +++ b/arch/x86/include/asm/gsseg.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_X86_GSSEG_H +#define _ASM_X86_GSSEG_H + +#include <linux/types.h> + +#include <asm/asm.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> +#include <asm/processor.h> +#include <asm/nops.h> + +#ifdef CONFIG_X86_64 + +extern asmlinkage void asm_load_gs_index(u16 selector); + +/* Replace with "lkgs %di" once binutils support LKGS instruction */ +#define LKGS_DI _ASM_BYTES(0xf2,0x0f,0x00,0xf7) + +static inline void native_lkgs(unsigned int selector) +{ + u16 sel = selector; + asm_inline volatile("1: " LKGS_DI + _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k[sel]) + : [sel] "+D" (sel)); +} + +static inline void native_load_gs_index(unsigned int selector) +{ + if (cpu_feature_enabled(X86_FEATURE_LKGS)) { + native_lkgs(selector); + } else { + unsigned long flags; + + local_irq_save(flags); + asm_load_gs_index(selector); + local_irq_restore(flags); + } +} + +#endif /* CONFIG_X86_64 */ + +static inline void __init lkgs_init(void) +{ +#ifdef CONFIG_PARAVIRT_XXL +#ifdef CONFIG_X86_64 + if (cpu_feature_enabled(X86_FEATURE_LKGS)) + pv_ops.cpu.load_gs_index = native_lkgs; +#endif +#endif +} + +#ifndef CONFIG_PARAVIRT_XXL + +static inline void load_gs_index(unsigned int selector) +{ +#ifdef CONFIG_X86_64 + native_load_gs_index(selector); +#else + loadsegment(gs, selector); +#endif +} + +#endif /* CONFIG_PARAVIRT_XXL */ + +#endif /* _ASM_X86_GSSEG_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index d9069bb26c7f..6b6d472baa0b 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -5,8 +5,7 @@ #include <linux/threads.h> typedef struct { - u16 __softirq_pending; -#if IS_ENABLED(CONFIG_KVM_INTEL) +#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL) u8 kvm_cpu_l1tf_flush_l1d; #endif unsigned int __nmi_count; /* arch dependent */ @@ -15,7 +14,7 @@ typedef struct { unsigned int irq_spurious_count; unsigned int icr_read_retry_count; #endif -#ifdef CONFIG_HAVE_KVM +#if IS_ENABLED(CONFIG_KVM) unsigned int kvm_posted_intr_ipis; unsigned int kvm_posted_intr_wakeup_ipis; unsigned int kvm_posted_intr_nested_ipis; @@ -37,17 +36,23 @@ typedef struct { #ifdef CONFIG_X86_MCE_AMD unsigned int irq_deferred_error_count; #endif -#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) +#ifdef CONFIG_X86_HV_CALLBACK_VECTOR unsigned int irq_hv_callback_count; #endif #if IS_ENABLED(CONFIG_HYPERV) unsigned int irq_hv_reenlightenment_count; unsigned int hyperv_stimer0_count; #endif +#ifdef CONFIG_X86_POSTED_MSI + unsigned int posted_msi_notification_count; +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +#ifdef CONFIG_X86_POSTED_MSI +DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); +#endif #define __ARCH_IRQ_STAT #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) @@ -60,24 +65,30 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat +DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending); +#define local_softirq_pending_ref __softirq_pending -#if IS_ENABLED(CONFIG_KVM_INTEL) -static inline void kvm_set_cpu_l1tf_flush_l1d(void) +#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL) +/* + * This function is called from noinstr interrupt contexts + * and must be inlined to not get instrumentation. + */ +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1); } -static inline void kvm_clear_cpu_l1tf_flush_l1d(void) +static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void) { __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0); } -static inline bool kvm_get_cpu_l1tf_flush_l1d(void) +static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void) { return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d); } #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */ -static inline void kvm_set_cpu_l1tf_flush_l1d(void) { } +static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { } #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */ #endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index a8059930056d..585bdadba47d 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -23,10 +23,10 @@ #include <linux/interrupt.h> #include <linux/threads.h> -#include <asm/kmap_types.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> #include <asm/fixmap.h> +#include <asm/pgtable_areas.h> /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; @@ -58,22 +58,16 @@ extern unsigned long highstart_pfn, highend_pfn; #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -extern void *kmap_high(struct page *page); -extern void kunmap_high(struct page *page); - -void *kmap(struct page *page); -void kunmap(struct page *page); - -void *kmap_atomic_prot(struct page *page, pgprot_t prot); -void *kmap_atomic(struct page *page); -void __kunmap_atomic(void *kvaddr); -void *kmap_atomic_pfn(unsigned long pfn); -void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); - #define flush_cache_kmaps() do { } while (0) -extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); +#define arch_kmap_local_post_map(vaddr, pteval) \ + arch_flush_lazy_mmu_mode() + +#define arch_kmap_local_post_unmap(vaddr) \ + do { \ + flush_tlb_one_kernel((vaddr)); \ + arch_flush_lazy_mmu_mode(); \ + } while (0) #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 67385d56d4f4..ab0c78855ecb 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -74,18 +74,6 @@ extern void hpet_disable(void); extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); -struct irq_data; -struct hpet_dev; -struct irq_domain; - -extern void hpet_msi_unmask(struct irq_data *data); -extern void hpet_msi_mask(struct irq_data *data); -extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); -extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); -extern struct irq_domain *hpet_create_irq_domain(int hpet_id); -extern int hpet_assign_irq(struct irq_domain *domain, - struct hpet_dev *dev, int dev_num); - #ifdef CONFIG_HPET_EMULATE_RTC #include <linux/interrupt.h> @@ -96,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec); extern int hpet_set_periodic_freq(unsigned long freq); -extern int hpet_rtc_dropped_irq(void); extern int hpet_rtc_timer_init(void); extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); extern int hpet_register_irq_handler(rtc_irq_handler handler); diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 7469d321f072..1721b1aadeb1 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -7,18 +7,4 @@ #define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE) -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) { - return 0; -} - -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif - #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index a1f0e90d0818..0bc931cd0698 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -44,10 +44,7 @@ struct arch_hw_breakpoint { /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -static inline int hw_breakpoint_slots(int type) -{ - return HBP_NUM; -} +#define hw_breakpoint_slots(type) (HBP_NUM) struct perf_event_attr; struct perf_event; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 32e666e1231e..cbe19e669080 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -16,9 +16,7 @@ #include <asm/irq_vectors.h> -#define IRQ_MATRIX_BITS NR_VECTORS - -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/percpu.h> #include <linux/profile.h> @@ -28,29 +26,7 @@ #include <asm/irq.h> #include <asm/sections.h> -/* Interrupt handlers registered during init_IRQ */ -extern asmlinkage void apic_timer_interrupt(void); -extern asmlinkage void x86_platform_ipi(void); -extern asmlinkage void kvm_posted_intr_ipi(void); -extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); -extern asmlinkage void kvm_posted_intr_nested_ipi(void); -extern asmlinkage void error_interrupt(void); -extern asmlinkage void irq_work_interrupt(void); -extern asmlinkage void uv_bau_message_intr1(void); - -extern asmlinkage void spurious_interrupt(void); -extern asmlinkage void thermal_interrupt(void); -extern asmlinkage void reschedule_interrupt(void); - -extern asmlinkage void irq_move_cleanup_interrupt(void); -extern asmlinkage void reboot_interrupt(void); -extern asmlinkage void threshold_interrupt(void); -extern asmlinkage void deferred_error_interrupt(void); - -extern asmlinkage void call_function_interrupt(void); -extern asmlinkage void call_function_single_interrupt(void); - -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_data; struct pci_dev; struct msi_desc; @@ -58,61 +34,54 @@ struct msi_desc; enum irq_alloc_type { X86_IRQ_ALLOC_TYPE_IOAPIC = 1, X86_IRQ_ALLOC_TYPE_HPET, - X86_IRQ_ALLOC_TYPE_MSI, - X86_IRQ_ALLOC_TYPE_MSIX, + X86_IRQ_ALLOC_TYPE_PCI_MSI, + X86_IRQ_ALLOC_TYPE_PCI_MSIX, X86_IRQ_ALLOC_TYPE_DMAR, + X86_IRQ_ALLOC_TYPE_AMDVI, X86_IRQ_ALLOC_TYPE_UV, }; +struct ioapic_alloc_info { + int pin; + int node; + u32 is_level : 1; + u32 active_low : 1; + u32 valid : 1; +}; + +struct uv_alloc_info { + int limit; + int blade; + unsigned long offset; + char *name; + +}; + +/** + * irq_alloc_info - X86 specific interrupt allocation info + * @type: X86 specific allocation type + * @flags: Flags for allocation tweaks + * @devid: Device ID for allocations + * @hwirq: Associated hw interrupt number in the domain + * @mask: CPU mask for vector allocation + * @desc: Pointer to msi descriptor + * @data: Allocation specific data + * + * @ioapic: IOAPIC specific allocation data + * @uv: UV specific allocation data +*/ struct irq_alloc_info { enum irq_alloc_type type; u32 flags; - const struct cpumask *mask; /* CPU mask for vector allocation */ + u32 devid; + irq_hw_number_t hwirq; + const struct cpumask *mask; + struct msi_desc *desc; + void *data; + union { - int unused; -#ifdef CONFIG_HPET_TIMER - struct { - int hpet_id; - int hpet_index; - void *hpet_data; - }; -#endif -#ifdef CONFIG_PCI_MSI - struct { - struct pci_dev *msi_dev; - irq_hw_number_t msi_hwirq; - }; -#endif -#ifdef CONFIG_X86_IO_APIC - struct { - int ioapic_id; - int ioapic_pin; - int ioapic_node; - u32 ioapic_trigger : 1; - u32 ioapic_polarity : 1; - u32 ioapic_valid : 1; - struct IO_APIC_route_entry *ioapic_entry; - }; -#endif -#ifdef CONFIG_DMAR_TABLE - struct { - int dmar_id; - void *dmar_data; - }; -#endif -#ifdef CONFIG_X86_UV - struct { - int uv_limit; - int uv_blade; - unsigned long uv_offset; - char *uv_name; - }; -#endif -#if IS_ENABLED(CONFIG_VMD) - struct { - struct msi_desc *desc; - }; -#endif + struct ioapic_alloc_info ioapic; + struct uv_alloc_info uv; }; }; @@ -123,21 +92,23 @@ struct irq_cfg { extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); -extern void lock_vector_lock(void); -extern void unlock_vector_lock(void); #ifdef CONFIG_SMP -extern void send_cleanup_vector(struct irq_cfg *); +extern void vector_schedule_cleanup(struct irq_cfg *); extern void irq_complete_move(struct irq_cfg *cfg); #else -static inline void send_cleanup_vector(struct irq_cfg *c) { } +static inline void vector_schedule_cleanup(struct irq_cfg *c) { } static inline void irq_complete_move(struct irq_cfg *c) { } #endif - extern void apic_ack_edge(struct irq_data *data); -#else /* CONFIG_X86_LOCAL_APIC */ +#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ + +#ifdef CONFIG_X86_LOCAL_APIC +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +#else static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} -#endif /* CONFIG_X86_LOCAL_APIC */ +#endif /* Statistics */ extern atomic_t irq_err_count; @@ -150,12 +121,15 @@ extern char irq_entries_start[]; #define trace_irq_entries_start irq_entries_start #endif +extern char spurious_entries_start[]; + #define VECTOR_UNUSED NULL -#define VECTOR_RETRIGGERED ((void *)~0UL) +#define VECTOR_SHUTDOWN ((void *)-1L) +#define VECTOR_RETRIGGERED ((void *)-2L) typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -#endif /* !ASSEMBLY_ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_HW_IRQ_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h deleted file mode 100644 index 705dafc2d11a..000000000000 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ /dev/null @@ -1,869 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ - -/* - * This file contains definitions from Hyper-V Hypervisor Top-Level Functional - * Specification (TLFS): - * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs - */ - -#ifndef _ASM_X86_HYPERV_TLFS_H -#define _ASM_X86_HYPERV_TLFS_H - -#include <linux/types.h> -#include <asm/page.h> - -/* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). - */ -#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 -#define HYPERV_CPUID_INTERFACE 0x40000001 -#define HYPERV_CPUID_VERSION 0x40000002 -#define HYPERV_CPUID_FEATURES 0x40000003 -#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 -#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 -#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A - -#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 -#define HYPERV_CPUID_MIN 0x40000005 -#define HYPERV_CPUID_MAX 0x4000ffff - -/* - * Feature identification. EAX indicates which features are available - * to the partition based upon the current partition privileges. - * These are HYPERV_CPUID_FEATURES.EAX bits. - */ - -/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ -#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0) -/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ -#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) -/* - * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM - * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available - */ -#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2) -/* - * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through - * HV_X64_MSR_STIMER3_COUNT) available - */ -#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) -/* - * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) - * are available - */ -#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4) -/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ -#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5) -/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ -#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6) -/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ -#define HV_X64_MSR_RESET_AVAILABLE BIT(7) -/* - * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, - * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, - * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available - */ -#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8) -/* Partition reference TSC MSR is available */ -#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) -/* Partition Guest IDLE MSR is available */ -#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10) -/* - * There is a single feature flag that signifies if the partition has access - * to MSRs with local APIC and TSC frequencies. - */ -#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) -/* AccessReenlightenmentControls privilege */ -#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) - -/* - * Feature identification: indicates which flags were specified at partition - * creation. The format is the same as the partition creation flag structure - * defined in section Partition Creation Flags. - * These are HYPERV_CPUID_FEATURES.EBX bits. - */ -#define HV_X64_CREATE_PARTITIONS BIT(0) -#define HV_X64_ACCESS_PARTITION_ID BIT(1) -#define HV_X64_ACCESS_MEMORY_POOL BIT(2) -#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3) -#define HV_X64_POST_MESSAGES BIT(4) -#define HV_X64_SIGNAL_EVENTS BIT(5) -#define HV_X64_CREATE_PORT BIT(6) -#define HV_X64_CONNECT_PORT BIT(7) -#define HV_X64_ACCESS_STATS BIT(8) -#define HV_X64_DEBUGGING BIT(11) -#define HV_X64_CPU_POWER_MANAGEMENT BIT(12) - -/* - * Feature identification. EDX indicates which miscellaneous features - * are available to the partition. - * These are HYPERV_CPUID_FEATURES.EDX bits. - */ -/* The MWAIT instruction is available (per section MONITOR / MWAIT) */ -#define HV_X64_MWAIT_AVAILABLE BIT(0) -/* Guest debugging support is available */ -#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) -/* Performance Monitor support is available*/ -#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) -/* Support for physical CPU dynamic partitioning events is available*/ -#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -/* - * Support for passing hypercall input parameter block via XMM - * registers is available - */ -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) -/* Support for a virtual guest idle state is available */ -#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) -/* Frequency MSRs available */ -#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) -/* Crash MSR available */ -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) -/* stimer Direct Mode is available */ -#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) - -/* - * Implementation recommendations. Indicates which behaviors the hypervisor - * recommends the OS implement for optimal performance. - * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits. - */ -/* - * Recommend using hypercall for address space switches rather - * than MOV to CR3 instruction - */ -#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0) -/* Recommend using hypercall for local TLB flushes rather - * than INVLPG or MOV to CR3 instructions */ -#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1) -/* - * Recommend using hypercall for remote TLB flushes rather - * than inter-processor interrupts - */ -#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2) -/* - * Recommend using MSRs for accessing APIC registers - * EOI, ICR and TPR rather than their memory-mapped counterparts - */ -#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3) -/* Recommend using the hypervisor-provided MSR to initiate a system RESET */ -#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4) -/* - * Recommend using relaxed timing for this partition. If used, - * the VM should disable any watchdog timeouts that rely on the - * timely delivery of external interrupts - */ -#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5) - -/* - * Recommend not using Auto End-Of-Interrupt feature - */ -#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9) - -/* - * Recommend using cluster IPI hypercalls. - */ -#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10) - -/* Recommend using the newer ExProcessorMasks interface */ -#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) - -/* Recommend using enlightened VMCS */ -#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) - -/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ -#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) -#define HV_X64_NESTED_MSR_BITMAP BIT(19) - -/* Hyper-V specific model specific registers (MSRs) */ - -/* MSR used to identify the guest OS. */ -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 - -/* MSR used to setup pages used to communicate with the hypervisor. */ -#define HV_X64_MSR_HYPERCALL 0x40000001 - -/* MSR used to provide vcpu index */ -#define HV_X64_MSR_VP_INDEX 0x40000002 - -/* MSR used to reset the guest OS. */ -#define HV_X64_MSR_RESET 0x40000003 - -/* MSR used to provide vcpu runtime in 100ns units */ -#define HV_X64_MSR_VP_RUNTIME 0x40000010 - -/* MSR used to read the per-partition time reference counter */ -#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 - -/* A partition's reference time stamp counter (TSC) page */ -#define HV_X64_MSR_REFERENCE_TSC 0x40000021 - -/* MSR used to retrieve the TSC frequency */ -#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 - -/* MSR used to retrieve the local APIC timer frequency */ -#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 - -/* Define the virtual APIC registers */ -#define HV_X64_MSR_EOI 0x40000070 -#define HV_X64_MSR_ICR 0x40000071 -#define HV_X64_MSR_TPR 0x40000072 -#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 - -/* Define synthetic interrupt controller model specific registers. */ -#define HV_X64_MSR_SCONTROL 0x40000080 -#define HV_X64_MSR_SVERSION 0x40000081 -#define HV_X64_MSR_SIEFP 0x40000082 -#define HV_X64_MSR_SIMP 0x40000083 -#define HV_X64_MSR_EOM 0x40000084 -#define HV_X64_MSR_SINT0 0x40000090 -#define HV_X64_MSR_SINT1 0x40000091 -#define HV_X64_MSR_SINT2 0x40000092 -#define HV_X64_MSR_SINT3 0x40000093 -#define HV_X64_MSR_SINT4 0x40000094 -#define HV_X64_MSR_SINT5 0x40000095 -#define HV_X64_MSR_SINT6 0x40000096 -#define HV_X64_MSR_SINT7 0x40000097 -#define HV_X64_MSR_SINT8 0x40000098 -#define HV_X64_MSR_SINT9 0x40000099 -#define HV_X64_MSR_SINT10 0x4000009A -#define HV_X64_MSR_SINT11 0x4000009B -#define HV_X64_MSR_SINT12 0x4000009C -#define HV_X64_MSR_SINT13 0x4000009D -#define HV_X64_MSR_SINT14 0x4000009E -#define HV_X64_MSR_SINT15 0x4000009F - -/* - * Synthetic Timer MSRs. Four timers per vcpu. - */ -#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 -#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 -#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 -#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 -#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 -#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 -#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 -#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 - -/* Hyper-V guest idle MSR */ -#define HV_X64_MSR_GUEST_IDLE 0x400000F0 - -/* Hyper-V guest crash notification MSR's */ -#define HV_X64_MSR_CRASH_P0 0x40000100 -#define HV_X64_MSR_CRASH_P1 0x40000101 -#define HV_X64_MSR_CRASH_P2 0x40000102 -#define HV_X64_MSR_CRASH_P3 0x40000103 -#define HV_X64_MSR_CRASH_P4 0x40000104 -#define HV_X64_MSR_CRASH_CTL 0x40000105 - -/* TSC emulation after migration */ -#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 -#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 -#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 - -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor. - */ -union hv_x64_msr_hypercall_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 guest_physical_address:52; - } __packed; -}; - -/* - * TSC page layout. - */ -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -} __packed; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - -struct hv_reenlightenment_control { - __u64 vector:8; - __u64 reserved1:8; - __u64 enabled:1; - __u64 reserved2:15; - __u64 target_vp:32; -} __packed; - -struct hv_tsc_emulation_control { - __u64 enabled:1; - __u64 reserved:63; -} __packed; - -struct hv_tsc_emulation_status { - __u64 inprogress:1; - __u64 reserved:63; -} __packed; - -#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 -#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) - -/* - * Crash notification (HV_X64_MSR_CRASH_CTL) flags. - */ -#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) -#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) -#define HV_X64_MSR_CRASH_PARAMS \ - (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) - -#define HV_IPI_LOW_VECTOR 0x10 -#define HV_IPI_HIGH_VECTOR 0xff - -/* Declare the various hypercall operations. */ -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 -#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 -#define HVCALL_SEND_IPI 0x000b -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 -#define HVCALL_SEND_IPI_EX 0x0015 -#define HVCALL_POST_MESSAGE 0x005c -#define HVCALL_SIGNAL_EVENT 0x005d -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 - -#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) - -/* Hyper-V Enlightened VMCS version mask in nested features CPUID */ -#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff - -#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 -#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 - -#define HV_PROCESSOR_POWER_STATE_C0 0 -#define HV_PROCESSOR_POWER_STATE_C1 1 -#define HV_PROCESSOR_POWER_STATE_C2 2 -#define HV_PROCESSOR_POWER_STATE_C3 3 - -#define HV_FLUSH_ALL_PROCESSORS BIT(0) -#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) -#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) -#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) - -enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARSE_4K, - HV_GENERIC_SET_ALL, -}; - -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) - -/* hypercall status code */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_PORT_ID 17 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -typedef struct _HV_REFERENCE_TSC_PAGE { - __u32 tsc_sequence; - __u32 res1; - __u64 tsc_scale; - __s64 tsc_offset; -} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; - -/* Define the number of synthetic interrupt sources. */ -#define HV_SYNIC_SINT_COUNT (16) -/* Define the expected SynIC version. */ -#define HV_SYNIC_VERSION_1 (0x1) -/* Valid SynIC vectors are 16-255. */ -#define HV_SYNIC_FIRST_VALID_VECTOR (16) - -#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) -#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) -#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) -#define HV_SYNIC_SINT_MASKED (1ULL << 16) -#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) -#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) - -#define HV_SYNIC_STIMER_COUNT (4) - -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) - -/* Define hypervisor message types. */ -enum hv_message_type { - HVMSG_NONE = 0x00000000, - - /* Memory access messages. */ - HVMSG_UNMAPPED_GPA = 0x80000000, - HVMSG_GPA_INTERCEPT = 0x80000001, - - /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, - - /* Error messages. */ - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, - - /* Trace buffer complete messages. */ - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, - - /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, - HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - __u8 asu8; - struct { - __u8 msg_pending:1; - __u8 reserved:7; - } __packed; -}; - -/* Define port identifier type. */ -union hv_port_id { - __u32 asu32; - struct { - __u32 id:24; - __u32 reserved:8; - } __packed u; -}; - -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - __u32 message_type; - __u8 payload_size; - union hv_message_flags message_flags; - __u8 reserved[2]; - union { - __u64 sender; - union hv_port_id port; - }; -} __packed; - -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u; -} __packed; - -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -} __packed; - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - __u32 timer_index; - __u32 reserved; - __u64 expiration_time; /* When the timer expired */ - __u64 delivery_time; /* When the message was delivered */ -} __packed; - -/* Define virtual processor assist page structure. */ -struct hv_vp_assist_page { - __u32 apic_assist; - __u32 reserved; - __u64 vtl_control[2]; - __u64 nested_enlightenments_control[2]; - __u32 enlighten_vmentry; - __u32 padding; - __u64 current_nested_vmcs; -} __packed; - -struct hv_enlightened_vmcs { - u32 revision_id; - u32 abort; - - u16 host_es_selector; - u16 host_cs_selector; - u16 host_ss_selector; - u16 host_ds_selector; - u16 host_fs_selector; - u16 host_gs_selector; - u16 host_tr_selector; - - u16 padding16_1; - - u64 host_ia32_pat; - u64 host_ia32_efer; - - u64 host_cr0; - u64 host_cr3; - u64 host_cr4; - - u64 host_ia32_sysenter_esp; - u64 host_ia32_sysenter_eip; - u64 host_rip; - u32 host_ia32_sysenter_cs; - - u32 pin_based_vm_exec_control; - u32 vm_exit_controls; - u32 secondary_vm_exec_control; - - u64 io_bitmap_a; - u64 io_bitmap_b; - u64 msr_bitmap; - - u16 guest_es_selector; - u16 guest_cs_selector; - u16 guest_ss_selector; - u16 guest_ds_selector; - u16 guest_fs_selector; - u16 guest_gs_selector; - u16 guest_ldtr_selector; - u16 guest_tr_selector; - - u32 guest_es_limit; - u32 guest_cs_limit; - u32 guest_ss_limit; - u32 guest_ds_limit; - u32 guest_fs_limit; - u32 guest_gs_limit; - u32 guest_ldtr_limit; - u32 guest_tr_limit; - u32 guest_gdtr_limit; - u32 guest_idtr_limit; - - u32 guest_es_ar_bytes; - u32 guest_cs_ar_bytes; - u32 guest_ss_ar_bytes; - u32 guest_ds_ar_bytes; - u32 guest_fs_ar_bytes; - u32 guest_gs_ar_bytes; - u32 guest_ldtr_ar_bytes; - u32 guest_tr_ar_bytes; - - u64 guest_es_base; - u64 guest_cs_base; - u64 guest_ss_base; - u64 guest_ds_base; - u64 guest_fs_base; - u64 guest_gs_base; - u64 guest_ldtr_base; - u64 guest_tr_base; - u64 guest_gdtr_base; - u64 guest_idtr_base; - - u64 padding64_1[3]; - - u64 vm_exit_msr_store_addr; - u64 vm_exit_msr_load_addr; - u64 vm_entry_msr_load_addr; - - u64 cr3_target_value0; - u64 cr3_target_value1; - u64 cr3_target_value2; - u64 cr3_target_value3; - - u32 page_fault_error_code_mask; - u32 page_fault_error_code_match; - - u32 cr3_target_count; - u32 vm_exit_msr_store_count; - u32 vm_exit_msr_load_count; - u32 vm_entry_msr_load_count; - - u64 tsc_offset; - u64 virtual_apic_page_addr; - u64 vmcs_link_pointer; - - u64 guest_ia32_debugctl; - u64 guest_ia32_pat; - u64 guest_ia32_efer; - - u64 guest_pdptr0; - u64 guest_pdptr1; - u64 guest_pdptr2; - u64 guest_pdptr3; - - u64 guest_pending_dbg_exceptions; - u64 guest_sysenter_esp; - u64 guest_sysenter_eip; - - u32 guest_activity_state; - u32 guest_sysenter_cs; - - u64 cr0_guest_host_mask; - u64 cr4_guest_host_mask; - u64 cr0_read_shadow; - u64 cr4_read_shadow; - u64 guest_cr0; - u64 guest_cr3; - u64 guest_cr4; - u64 guest_dr7; - - u64 host_fs_base; - u64 host_gs_base; - u64 host_tr_base; - u64 host_gdtr_base; - u64 host_idtr_base; - u64 host_rsp; - - u64 ept_pointer; - - u16 virtual_processor_id; - u16 padding16_2[3]; - - u64 padding64_2[5]; - u64 guest_physical_address; - - u32 vm_instruction_error; - u32 vm_exit_reason; - u32 vm_exit_intr_info; - u32 vm_exit_intr_error_code; - u32 idt_vectoring_info_field; - u32 idt_vectoring_error_code; - u32 vm_exit_instruction_len; - u32 vmx_instruction_info; - - u64 exit_qualification; - u64 exit_io_instruction_ecx; - u64 exit_io_instruction_esi; - u64 exit_io_instruction_edi; - u64 exit_io_instruction_eip; - - u64 guest_linear_address; - u64 guest_rsp; - u64 guest_rflags; - - u32 guest_interruptibility_info; - u32 cpu_based_vm_exec_control; - u32 exception_bitmap; - u32 vm_entry_controls; - u32 vm_entry_intr_info_field; - u32 vm_entry_exception_error_code; - u32 vm_entry_instruction_len; - u32 tpr_threshold; - - u64 guest_rip; - - u32 hv_clean_fields; - u32 hv_padding_32; - u32 hv_synthetic_controls; - struct { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 reserved:30; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - - u64 hv_vm_id; - u64 partition_assist_page; - u64 padding64_4[4]; - u64 guest_bndcfgs; - u64 padding64_5[7]; - u64 xss_exit_bitmap; - u64 padding64_6[7]; -} __packed; - -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) - -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF - -/* Define synthetic interrupt controller flag constants. */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) -#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long)) - -/* - * Synthetic timer configuration. - */ -union hv_stimer_config { - u64 as_uint64; - struct { - u64 enable:1; - u64 periodic:1; - u64 lazy:1; - u64 auto_enable:1; - u64 apic_vector:8; - u64 direct_mode:1; - u64 reserved_z0:3; - u64 sintx:4; - u64 reserved_z1:44; - } __packed; -}; - - -/* Define the synthetic interrupt controller event flags format. */ -union hv_synic_event_flags { - unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT]; -}; - -/* Define SynIC control register. */ -union hv_synic_scontrol { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:63; - } __packed; -}; - -/* Define synthetic interrupt source. */ -union hv_synic_sint { - u64 as_uint64; - struct { - u64 vector:8; - u64 reserved1:8; - u64 masked:1; - u64 auto_eoi:1; - u64 reserved2:46; - } __packed; -}; - -/* Define the format of the SIMP register */ -union hv_synic_simp { - u64 as_uint64; - struct { - u64 simp_enabled:1; - u64 preserved:11; - u64 base_simp_gpa:52; - } __packed; -}; - -/* Define the format of the SIEFP register */ -union hv_synic_siefp { - u64 as_uint64; - struct { - u64 siefp_enabled:1; - u64 preserved:11; - u64 base_siefp_gpa:52; - } __packed; -}; - -struct hv_vpset { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; -} __packed; - -/* HvCallSendSyntheticClusterIpi hypercall */ -struct hv_send_ipi { - u32 vector; - u32 reserved; - u64 cpu_mask; -} __packed; - -/* HvCallSendSyntheticClusterIpiEx hypercall */ -struct hv_send_ipi_ex { - u32 vector; - u32 reserved; - struct hv_vpset vp_set; -} __packed; - -/* HvFlushGuestPhysicalAddressSpace hypercalls */ -struct hv_guest_mapping_flush { - u64 address_space; - u64 flags; -} __packed; - -/* - * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited - * by the bitwidth of "additional_pages" in union hv_gpa_page_range. - */ -#define HV_MAX_FLUSH_PAGES (2048) - -/* HvFlushGuestPhysicalAddressList hypercall */ -union hv_gpa_page_range { - u64 address_space; - struct { - u64 additional_pages:11; - u64 largepage:1; - u64 basepfn:52; - } page; -}; - -/* - * All input flush parameters should be in single page. The max flush - * count is equal with how many entries of union hv_gpa_page_range can - * be populated into the input parameter page. - */ -#define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) / \ - sizeof(union hv_gpa_page_range)) - -struct hv_guest_mapping_flush_list { - u64 address_space; - u64 flags; - union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT]; -}; - -/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ -struct hv_tlb_flush { - u64 address_space; - u64 flags; - u64 processor_mask; - u64 gva_list[]; -} __packed; - -/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ -struct hv_tlb_flush_ex { - u64 address_space; - u64 flags; - struct hv_vpset hv_vp_set; - u64 gva_list[]; -} __packed; - -#endif diff --git a/arch/x86/include/asm/hyperv_timer.h b/arch/x86/include/asm/hyperv_timer.h new file mode 100644 index 000000000000..388fa81b8f38 --- /dev/null +++ b/arch/x86/include/asm/hyperv_timer.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_HYPERV_TIMER_H +#define _ASM_X86_HYPERV_TIMER_H + +#include <asm/msr.h> + +#define hv_get_raw_timer() rdtsc_ordered() + +#endif diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 8c5aaba6633f..9ad86a7d13f6 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -29,6 +29,8 @@ enum x86_hypervisor_type { X86_HYPER_XEN_HVM, X86_HYPER_KVM, X86_HYPER_JAILHOUSE, + X86_HYPER_ACRN, + X86_HYPER_BHYVE, }; #ifdef CONFIG_HYPERVISOR_GUEST @@ -52,8 +54,21 @@ struct hypervisor_x86 { /* runtime callbacks */ struct x86_hyper_runtime runtime; + + /* ignore nopv parameter */ + bool ignore_nopv; }; +extern const struct hypervisor_x86 x86_hyper_vmware; +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_pv; +extern const struct hypervisor_x86 x86_hyper_kvm; +extern const struct hypervisor_x86 x86_hyper_jailhouse; +extern const struct hypervisor_x86 x86_hyper_acrn; +extern const struct hypervisor_x86 x86_hyper_bhyve; +extern struct hypervisor_x86 x86_hyper_xen_hvm; + +extern bool nopv; extern enum x86_hypervisor_type x86_hyper_type; extern void init_hypervisor_platform(void); static inline bool hypervisor_is_type(enum x86_hypervisor_type type) diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 89789e8c80f6..c715097e92fd 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -19,6 +19,8 @@ extern unsigned int cached_irq_mask; #define PIC_MASTER_OCW3 PIC_MASTER_ISR #define PIC_SLAVE_CMD 0xa0 #define PIC_SLAVE_IMR 0xa1 +#define PIC_ELCR1 0x4d0 +#define PIC_ELCR2 0x4d1 /* i8259A PIC related value */ #define PIC_CASCADE_IR 2 @@ -67,6 +69,8 @@ struct legacy_pic { void (*make_irq)(unsigned int irq); }; +void legacy_pic_pcat_compat(void); + extern struct legacy_pic *legacy_pic; extern struct legacy_pic null_legacy_pic; diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 2c5f7861d373..9d69f3f8dbab 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -2,7 +2,6 @@ #ifndef _ASM_X86_IA32_H #define _ASM_X86_IA32_H - #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> @@ -57,17 +56,37 @@ struct stat64 { unsigned long long st_ino; } __attribute__((packed)); -#define IA32_STACK_TOP IA32_PAGE_OFFSET +extern bool __ia32_enabled; + +static __always_inline bool ia32_enabled(void) +{ + return __ia32_enabled; +} + +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + +#else /* !CONFIG_IA32_EMULATION */ -#ifdef __KERNEL__ -struct linux_binprm; -extern int ia32_setup_arg_pages(struct linux_binprm *bprm, - unsigned long stack_top, int exec_stack); -struct mm_struct; -extern void ia32_pick_mmap_layout(struct mm_struct *mm); +static __always_inline bool ia32_enabled(void) +{ + return IS_ENABLED(CONFIG_X86_32); +} + +static inline void ia32_disable(void) {} #endif -#endif /* !CONFIG_IA32_SUPPORT */ +static inline bool ia32_enabled_verbose(void) +{ + bool enabled = ia32_enabled(); + + if (IS_ENABLED(CONFIG_IA32_EMULATION) && !enabled) + pr_notice_once("32-bit emulation disabled. You can reenable with ia32_emulation=on\n"); + + return enabled; +} #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h deleted file mode 100644 index aa065c94ccf5..000000000000 --- a/arch/x86/include/asm/ia32_unistd.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_IA32_UNISTD_H -#define _ASM_X86_IA32_UNISTD_H - -/* - * This file contains the system call numbers of the ia32 compat ABI, - * this is for the kernel only. - */ -#define __SYSCALL_ia32_NR(x) (x) -#include <asm/unistd_32_ia32.h> - -#endif /* _ASM_X86_IA32_UNISTD_H */ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h new file mode 100644 index 000000000000..5e45d6424722 --- /dev/null +++ b/arch/x86/include/asm/ibt.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IBT_H +#define _ASM_X86_IBT_H + +#include <linux/types.h> + +/* + * The rules for enabling IBT are: + * + * - CC_HAS_IBT: the toolchain supports it + * - X86_KERNEL_IBT: it is selected in Kconfig + * - !__DISABLE_EXPORTS: this is regular kernel code + * + * Esp. that latter one is a bit non-obvious, but some code like compressed, + * purgatory, realmode etc.. is built with custom CFLAGS that do not include + * -fcf-protection=branch and things will go *bang*. + * + * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0. + */ +#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS) + +#define HAS_KERNEL_IBT 1 + +#ifndef __ASSEMBLER__ + +#ifdef CONFIG_X86_64 +#define ASM_ENDBR "endbr64\n\t" +#else +#define ASM_ENDBR "endbr32\n\t" +#endif + +#define __noendbr __attribute__((nocf_check)) + +/* + * Create a dummy function pointer reference to prevent objtool from marking + * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by + * apply_seal_endbr()). + */ +#define IBT_NOSEAL(fname) \ + ".pushsection .discard.ibt_endbr_noseal\n\t" \ + _ASM_PTR fname "\n\t" \ + ".popsection\n\t" + +static __always_inline __attribute_const__ u32 gen_endbr(void) +{ + u32 endbr; + + /* + * Generate ENDBR64 in a way that is sure to not result in + * an ENDBR64 instruction as immediate. + */ + asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t" + "not %[endbr]\n\t" + : [endbr] "=&r" (endbr) ); + + return endbr; +} + +static __always_inline __attribute_const__ u32 gen_endbr_poison(void) +{ + /* + * 4 byte NOP that isn't NOP4, such that it will be unique to (former) + * ENDBR sites. Additionally it carries UDB as immediate. + */ + return 0xd6401f0f; /* nopl -42(%rax) */ +} + +static inline bool __is_endbr(u32 val) +{ + if (val == gen_endbr_poison()) + return true; + + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ + return val == gen_endbr(); +} + +extern __noendbr bool is_endbr(u32 *val); +extern __noendbr u64 ibt_save(bool disable); +extern __noendbr void ibt_restore(u64 save); + +#else /* __ASSEMBLER__ */ + +#ifdef CONFIG_X86_64 +#define ENDBR endbr64 +#else +#define ENDBR endbr32 +#endif + +#endif /* __ASSEMBLER__ */ + +#else /* !IBT */ + +#define HAS_KERNEL_IBT 0 + +#ifndef __ASSEMBLER__ + +#define ASM_ENDBR +#define IBT_NOSEAL(name) + +#define __noendbr + +static inline bool is_endbr(u32 *val) { return false; } + +static inline u64 ibt_save(bool disable) { return 0; } +static inline void ibt_restore(u64 save) { } + +#else /* __ASSEMBLER__ */ + +#define ENDBR + +#endif /* __ASSEMBLER__ */ + +#endif /* CONFIG_X86_KERNEL_IBT */ + +#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT) + +#endif /* _ASM_X86_IBT_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h new file mode 100644 index 000000000000..3218770670d3 --- /dev/null +++ b/arch/x86/include/asm/idtentry.h @@ -0,0 +1,775 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IDTENTRY_H +#define _ASM_X86_IDTENTRY_H + +/* Interrupts/Exceptions */ +#include <asm/trapnr.h> + +#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) + +#ifndef __ASSEMBLER__ +#include <linux/entry-common.h> +#include <linux/hardirq.h> + +#include <asm/irq_stack.h> + +typedef void (*idtentry_t)(struct pt_regs *regs); + +/** + * DECLARE_IDTENTRY - Declare functions for simple IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares four functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the FRED event dispatcher (maybe unused) + * - The C handler called from the ASM entry point + * + * Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it + * declares the entry points for usage in C code. There is an ASM variant + * as well which is used to emit the entry stubs in entry_32/64.S. + */ +#define DECLARE_IDTENTRY(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + void fred_##func(struct pt_regs *regs); \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY - Emit code for simple IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * irqentry_enter() contains common code which has to be invoked before + * arbitrary code in the body. irqentry_exit() contains common code + * which has to run before returning to the low level assembly code. + */ +#define DEFINE_IDTENTRY(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + instrumentation_begin(); \ + __##func (regs); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + +/* Special case for 32bit IRET 'trap' */ +#define DECLARE_IDTENTRY_SW DECLARE_IDTENTRY +#define DEFINE_IDTENTRY_SW DEFINE_IDTENTRY + +/** + * DECLARE_IDTENTRY_ERRORCODE - Declare functions for simple IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Same as DECLARE_IDTENTRY, but has an extra error_code argument for the + * C-handler. + */ +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + __visible void func(struct pt_regs *regs, unsigned long error_code) + +/** + * DEFINE_IDTENTRY_ERRORCODE - Emit code for simple IDT entry points + * Error code pushed by hardware + * @func: Function name of the entry point + * + * Same as DEFINE_IDTENTRY, but has an extra error_code argument + */ +#define DEFINE_IDTENTRY_ERRORCODE(func) \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + instrumentation_begin(); \ + __##func (regs, error_code); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code) + +/** + * DECLARE_IDTENTRY_RAW - Declare functions for raw IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_RAW - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY() this does not invoke the + * idtentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW(func) \ +__visible noinstr void func(struct pt_regs *regs) + +/** + * DEFINE_FREDENTRY_RAW - Emit code for raw FRED entry points + * @func: Function name of the entry point + * + * @func is called from the FRED event dispatcher with interrupts disabled. + * + * See @DEFINE_IDTENTRY_RAW for further details. + */ +#define DEFINE_FREDENTRY_RAW(func) \ +noinstr void fred_##func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_RAW_ERRORCODE - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the + * irqentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ +__visible noinstr void func(struct pt_regs *regs, unsigned long error_code) + +/** + * DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry + * points (common/spurious) + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points + * @func: Function name of the entry point + * + * The vector number is pushed by the low level entry stub and handed + * to the function as error_code argument which needs to be truncated + * to an u8 because the push is sign extending. + * + * irq_enter/exit_rcu() are invoked before the function body and the + * KVM L1D flush request is set. Stack switching to the interrupt stack + * has to be done in the function body if necessary. + */ +#define DEFINE_IDTENTRY_IRQ(func) \ +static void __##func(struct pt_regs *regs, u32 vector); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + u32 vector = (u32)(u8)error_code; \ + \ + kvm_set_cpu_l1tf_flush_l1d(); \ + instrumentation_begin(); \ + run_irq_on_irqstack_cond(__##func, regs, vector); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +static noinline void __##func(struct pt_regs *regs, u32 vector) + +/** + * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points + * @func: Function name of the entry point + * + * irqentry_enter/exit() and irq_enter/exit_rcu() are invoked before the + * function body. KVM L1D flush request is set. + * + * Runs the function on the interrupt stack if the entry hit kernel mode + */ +#define DEFINE_IDTENTRY_SYSVEC(func) \ +static void __##func(struct pt_regs *regs); \ + \ +static __always_inline void instr_##func(struct pt_regs *regs) \ +{ \ + run_sysvec_on_irqstack_cond(__##func, regs); \ +} \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + kvm_set_cpu_l1tf_flush_l1d(); \ + instrumentation_begin(); \ + instr_##func (regs); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +void fred_##func(struct pt_regs *regs) \ +{ \ + instr_##func (regs); \ +} \ + \ +static noinline void __##func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY_SYSVEC_SIMPLE - Emit code for simple system vector IDT + * entry points + * @func: Function name of the entry point + * + * Runs the function on the interrupted stack. No switch to IRQ stack and + * only the minimal __irq_enter/exit() handling. + * + * Only use for 'empty' vectors like reschedule IPI and KVM posted + * interrupt vectors. + */ +#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +static __always_inline void instr_##func(struct pt_regs *regs) \ +{ \ + __irq_enter_raw(); \ + __##func (regs); \ + __irq_exit_raw(); \ +} \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + irqentry_state_t state = irqentry_enter(regs); \ + \ + kvm_set_cpu_l1tf_flush_l1d(); \ + instrumentation_begin(); \ + instr_##func (regs); \ + instrumentation_end(); \ + irqentry_exit(regs, state); \ +} \ + \ +void fred_##func(struct pt_regs *regs) \ +{ \ + instr_##func (regs); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit + * difference + */ +#define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#ifdef CONFIG_X86_64 +/** + * DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW, but declares also the NOIST C handler + * which is called from the ASM entry point on user mode entry + */ +#define DECLARE_IDTENTRY_IST(vector, func) \ + DECLARE_IDTENTRY_RAW(vector, func); \ + __visible void noist_##func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_VC - Declare functions for the VC entry point + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE, but declares also the + * safe_stack C handler. + */ +#define DECLARE_IDTENTRY_VC(vector, func) \ + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ + __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \ + __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code) + +/** + * DEFINE_IDTENTRY_IST - Emit code for IST entry points + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW + */ +#define DEFINE_IDTENTRY_IST(func) \ + DEFINE_IDTENTRY_RAW(func) + +/** + * DEFINE_IDTENTRY_NOIST - Emit code for NOIST entry points which + * belong to a IST entry point (MCE, DB) + * @func: Function name of the entry point. Must be the same as + * the function name of the corresponding IST variant + * + * Maps to DEFINE_IDTENTRY_RAW(). + */ +#define DEFINE_IDTENTRY_NOIST(func) \ + DEFINE_IDTENTRY_RAW(noist_##func) + +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_DF(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(func) + +/** + * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler + * when raised from kernel mode + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_VC_KERNEL(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func) + +/** + * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler + * when raised from user mode + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_VC_USER(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func) + +#else /* CONFIG_X86_64 */ + +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares two functions: + * - The ASM entry point: asm_##func + * - The C handler called from the C shim + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + asmlinkage void asm_##func(void); \ + __visible void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault on 32bit + * @func: Function name of the entry point + * + * This is called through the doublefault shim which already provides + * cr2 in the address argument. + */ +#define DEFINE_IDTENTRY_DF(func) \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + +#endif /* !CONFIG_X86_64 */ + +/* C-Code mapping */ +#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW +#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW +#define DEFINE_FREDENTRY_NMI DEFINE_FREDENTRY_RAW + +#ifdef CONFIG_X86_64 +#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST +#define DEFINE_FREDENTRY_MCE DEFINE_FREDENTRY_RAW + +#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST +#define DEFINE_FREDENTRY_DEBUG DEFINE_FREDENTRY_RAW +#endif + +void idt_install_sysvec(unsigned int n, const void *function); +void fred_install_sysvec(unsigned int vector, const idtentry_t function); + +#define sysvec_install(vector, function) { \ + if (IS_ENABLED(CONFIG_X86_FRED)) \ + fred_install_sysvec(vector, function); \ + if (!cpu_feature_enabled(X86_FEATURE_FRED)) \ + idt_install_sysvec(vector, asm_##function); \ +} + +#else /* !__ASSEMBLER__ */ + +/* + * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs. + */ +#define DECLARE_IDTENTRY(vector, func) \ + idtentry vector asm_##func func has_error_code=0 + +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + idtentry vector asm_##func func has_error_code=1 + +/* Special case for 32bit IRET 'trap'. Do not emit ASM code */ +#define DECLARE_IDTENTRY_SW(vector, func) + +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/* Entries for common/spurious (device) interrupts */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + idtentry_irq vector func + +/* System vector entries */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#ifdef CONFIG_X86_64 +# define DECLARE_IDTENTRY_MCE(vector, func) \ + idtentry_mce_db vector asm_##func func + +# define DECLARE_IDTENTRY_DEBUG(vector, func) \ + idtentry_mce_db vector asm_##func func + +# define DECLARE_IDTENTRY_DF(vector, func) \ + idtentry_df vector asm_##func func + +# define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +# define DECLARE_IDTENTRY_VC(vector, func) \ + idtentry_vc vector asm_##func func + +#else +# define DECLARE_IDTENTRY_MCE(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/* No ASM emitted for DF as this goes through a C shim */ +# define DECLARE_IDTENTRY_DF(vector, func) + +/* No ASM emitted for XEN hypervisor callback */ +# define DECLARE_IDTENTRY_XENCB(vector, func) + +#endif + +/* No ASM code emitted for NMI */ +#define DECLARE_IDTENTRY_NMI(vector, func) + +/* + * ASM code to emit the common vector entry stubs where each stub is + * packed into IDT_ALIGN bytes. + * + * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because + * GCC treats the local vector variable as unsigned int and would expand + * all vectors above 0x7F to a 5 byte push. The original code did an + * adjustment of the vector number to be in the signed byte range to avoid + * this. While clever it's mindboggling counterintuitive and requires the + * odd conversion back to a real vector number in the C entry points. Using + * .byte achieves the same thing and the only fixup needed in the C entry + * point is to mask off the bits above bit 7 because the push is sign + * extending. + */ + .align IDT_ALIGN +SYM_CODE_START(irq_entries_start) + vector=FIRST_EXTERNAL_VECTOR + .rept NR_EXTERNAL_VECTORS + UNWIND_HINT_IRET_REGS +0 : + ENDBR + .byte 0x6a, vector + jmp asm_common_interrupt + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc + vector = vector+1 + .endr +SYM_CODE_END(irq_entries_start) + +#ifdef CONFIG_X86_LOCAL_APIC + .align IDT_ALIGN +SYM_CODE_START(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept NR_SYSTEM_VECTORS + UNWIND_HINT_IRET_REGS +0 : + ENDBR + .byte 0x6a, vector + jmp asm_spurious_interrupt + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc + vector = vector+1 + .endr +SYM_CODE_END(spurious_entries_start) +#endif + +#endif /* __ASSEMBLER__ */ + +/* + * The actual entry points. Note that DECLARE_IDTENTRY*() serves two + * purposes: + * - provide the function declarations when included from C-Code + * - emit the ASM stubs when included from entry_32/64.S + * + * This avoids duplicate defines and ensures that everything is consistent. + */ + +/* + * Dummy trap number so the low level ASM macro vector number checks do not + * match which results in emitting plain IDTENTRY stubs without bells and + * whistles. + */ +#define X86_TRAP_OTHER 0xFFFF + +/* Simple exception entry points. No hardware error code */ +DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); +DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); +DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); +DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); +DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); +DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); +DECLARE_IDTENTRY(X86_TRAP_MF, exc_coprocessor_error); +DECLARE_IDTENTRY(X86_TRAP_XF, exc_simd_coprocessor_error); + +/* 32bit software IRET trap. Do not emit ASM code */ +DECLARE_IDTENTRY_SW(X86_TRAP_IRET, iret_error); + +/* Simple exception entries with error code pushed by hardware */ +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); + +/* Raw exception entries which need extra work */ +DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); +DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); +DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); + +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + +#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_64 +DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); +#else +DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check); +#endif +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check); +#endif +#endif + +/* NMI */ + +#if IS_ENABLED(CONFIG_KVM_INTEL) +/* + * Special entry point for VMX which invokes this on the kernel stack, even for + * 64-bit, i.e. without using an IST. asm_exc_nmi() requires an IST to work + * correctly vs. the NMI 'executing' marker. Used for 32-bit kernels as well + * to avoid more ifdeffery. + */ +DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_kvm_vmx); +#endif + +DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi); +#endif + +/* #DB */ +#ifdef CONFIG_X86_64 +DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); +#else +DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug); +#endif +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug); +#endif + +/* #DF */ +DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); +#endif + +/* #CP */ +#ifdef CONFIG_X86_CET +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection); +#endif + +/* #VC */ +#ifdef CONFIG_AMD_MEM_ENCRYPT +DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); +#endif + +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); +DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap); +#endif + +#ifdef CONFIG_INTEL_TDX_GUEST +DECLARE_IDTENTRY(X86_TRAP_VE, exc_virtualization_exception); +#endif + +/* Device interrupts common/spurious */ +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt); +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt); +#endif + +/* System vector entry points */ +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt); +DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR, sysvec_spurious_apic_interrupt); +DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt); +DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); +#endif + +#ifdef CONFIG_SMP +DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); +DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); +#else +# define fred_sysvec_reschedule_ipi NULL +# define fred_sysvec_reboot NULL +# define fred_sysvec_call_function_single NULL +# define fred_sysvec_call_function NULL +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +# ifdef CONFIG_X86_MCE_THRESHOLD +DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold); +# else +# define fred_sysvec_threshold NULL +# endif + +# ifdef CONFIG_X86_MCE_AMD +DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error); +# else +# define fred_sysvec_deferred_error NULL +# endif + +# ifdef CONFIG_X86_THERMAL_VECTOR +DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal); +# else +# define fred_sysvec_thermal NULL +# endif + +# ifdef CONFIG_IRQ_WORK +DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); +# else +# define fred_sysvec_irq_work NULL +# endif +#endif + +#if IS_ENABLED(CONFIG_KVM) +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); +#else +# define fred_sysvec_kvm_posted_intr_ipi NULL +# define fred_sysvec_kvm_posted_intr_wakeup_ipi NULL +# define fred_sysvec_kvm_posted_intr_nested_ipi NULL +#endif + +# ifdef CONFIG_X86_POSTED_MSI +DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification); +#else +# define fred_sysvec_posted_msi_notification NULL +# endif + +#if IS_ENABLED(CONFIG_HYPERV) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); +DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); +DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); +#endif + +#if IS_ENABLED(CONFIG_ACRN_GUEST) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); +#endif + +#ifdef CONFIG_XEN_PVHVM +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); +#endif + +#ifdef CONFIG_KVM_GUEST +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt); +#endif + +#undef X86_TRAP_OTHER + +#endif diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h index ebea2c9d2cdc..0d1dbf235679 100644 --- a/arch/x86/include/asm/imr.h +++ b/arch/x86/include/asm/imr.h @@ -1,13 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * imr.h: Isolated Memory Region API * * Copyright(c) 2013 Intel Corporation. * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. */ #ifndef _IMR_H #define _IMR_H diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 1c78580e58be..1b3060a3425c 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -1,26 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INAT_H #define _ASM_X86_INAT_H /* * x86 instruction attributes * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ -#include <asm/inat_types.h> +#include <asm/inat_types.h> /* __ignore_sync_check__ */ /* * Internal bits. Don't use bitmasks directly, because these bits are @@ -49,6 +35,10 @@ #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ #define INAT_PFX_EVEX 15 /* EVEX prefix */ +/* x86-64 REX2 prefix */ +#define INAT_PFX_REX2 16 /* 0xD5 */ +/* AMD XOP prefix */ +#define INAT_PFX_XOP 17 /* 0x8F */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 @@ -64,7 +54,7 @@ /* Legacy prefix */ #define INAT_PFX_OFFS 0 -#define INAT_PFX_BITS 4 +#define INAT_PFX_BITS 5 #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) /* Escape opcodes */ @@ -89,8 +79,13 @@ #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_XOPOK INAT_VEXOK #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) #define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) +#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8)) +#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9)) +#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10)) +#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -119,6 +114,8 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, insn_byte_t vex_pp); +extern insn_attr_t inat_get_xop_attribute(insn_byte_t opcode, + insn_byte_t map_select); /* Attribute checking functions */ static inline int inat_is_legacy_prefix(insn_attr_t attr) @@ -142,6 +139,11 @@ static inline int inat_is_rex_prefix(insn_attr_t attr) return (attr & INAT_PFX_MASK) == INAT_PFX_REX; } +static inline int inat_is_rex2_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX2; +} + static inline int inat_last_prefix_id(insn_attr_t attr) { if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) @@ -167,6 +169,11 @@ static inline int inat_is_vex3_prefix(insn_attr_t attr) return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; } +static inline int inat_is_xop_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_XOP; +} + static inline int inat_is_escape(insn_attr_t attr) { return attr & INAT_ESC_MASK; @@ -232,6 +239,11 @@ static inline int inat_accept_vex(insn_attr_t attr) return attr & INAT_VEXOK; } +static inline int inat_accept_xop(insn_attr_t attr) +{ + return attr & INAT_XOPOK; +} + static inline int inat_must_vex(insn_attr_t attr) { return attr & (INAT_VEXONLY | INAT_EVEXONLY); @@ -241,4 +253,14 @@ static inline int inat_must_evex(insn_attr_t attr) { return attr & INAT_EVEXONLY; } + +static inline int inat_evex_scalable(insn_attr_t attr) +{ + return attr & INAT_EVEX_SCALABLE; +} + +static inline int inat_is_invalid64(insn_attr_t attr) +{ + return attr & INAT_INV64; +} #endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h index cb3c20ce39cf..b047efa9ddc2 100644 --- a/arch/x86/include/asm/inat_types.h +++ b/arch/x86/include/asm/inat_types.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INAT_TYPES_H #define _ASM_X86_INAT_TYPES_H /* * x86 instruction attributes * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ /* Instruction attributes */ diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 5f1d3c421f68..01ccdd168df0 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -4,6 +4,7 @@ struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ + void (*free_pgt_page)(void *, void *); /* free buf for page table */ void *context; /* context for alloc_pgt_page */ unsigned long page_flag; /* page flag for PMD or PUD entry */ unsigned long offset; /* ident mapping offset */ @@ -14,4 +15,6 @@ struct x86_mapping_info { int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, unsigned long pstart, unsigned long pend); +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd); + #endif /* _ASM_X86_INIT_H */ diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 2b6ccf2c49f1..4733e9064ee5 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -15,9 +15,35 @@ #define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf) #define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4)) +int pt_regs_offset(struct pt_regs *regs, int regno); + +bool insn_has_rep_prefix(struct insn *insn); void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); +int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs); +unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); +int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip); +int insn_fetch_from_user(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); +int insn_fetch_from_user_inatomic(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); +bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE], int buf_size); + +enum insn_mmio_type { + INSN_MMIO_DECODE_FAILED, + INSN_MMIO_WRITE, + INSN_MMIO_WRITE_IMM, + INSN_MMIO_READ, + INSN_MMIO_READ_ZERO_EXTEND, + INSN_MMIO_READ_SIGN_EXTEND, + INSN_MMIO_MOVS, +}; + +enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes); + +bool insn_is_nop(struct insn *insn); #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index c2c01f84df75..846d21c1a7f8 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -1,27 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INSN_H #define _ASM_X86_INSN_H /* * x86 instruction analysis * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2009 */ +#include <asm/byteorder.h> /* insn_attr_t is defined in inat.h */ -#include <asm/inat.h> +#include <asm/inat.h> /* __ignore_sync_check__ */ + +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) struct insn_field { union { @@ -33,13 +23,58 @@ struct insn_field { unsigned char nbytes; }; +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; +} + +#else + +struct insn_field { + insn_value_t value; + union { + insn_value_t little; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->little = __cpu_to_le32(v); + p->nbytes = n; +} + +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; + p->value = __le32_to_cpu(p->little); +} +#endif + struct insn { struct insn_field prefixes; /* * Prefixes * prefixes.bytes[3]: last prefix */ struct insn_field rex_prefix; /* REX prefix */ - struct insn_field vex_prefix; /* VEX prefix */ + union { + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field xop_prefix; /* XOP prefix */ + }; struct insn_field opcode; /* * opcode.bytes[0]: opcode1 * opcode.bytes[1]: opcode2 @@ -58,6 +93,7 @@ struct insn { struct insn_field immediate2; /* for 64bit imm or seg16 */ }; + int emulate_prefix_size; insn_attr_t attr; unsigned char opnd_bytes; unsigned char addr_bytes; @@ -79,10 +115,15 @@ struct insn { #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) #define X86_SIB_BASE(sib) ((sib) & 0x07) -#define X86_REX_W(rex) ((rex) & 8) -#define X86_REX_R(rex) ((rex) & 4) -#define X86_REX_X(rex) ((rex) & 2) -#define X86_REX_B(rex) ((rex) & 1) +#define X86_REX2_M(rex) ((rex) & 0x80) /* REX2 M0 */ +#define X86_REX2_R(rex) ((rex) & 0x40) /* REX2 R4 */ +#define X86_REX2_X(rex) ((rex) & 0x20) /* REX2 X4 */ +#define X86_REX2_B(rex) ((rex) & 0x10) /* REX2 B4 */ + +#define X86_REX_W(rex) ((rex) & 8) /* REX or REX2 W */ +#define X86_REX_R(rex) ((rex) & 4) /* REX or REX2 R3 */ +#define X86_REX_X(rex) ((rex) & 2) /* REX or REX2 X3 */ +#define X86_REX_B(rex) ((rex) & 1) /* REX or REX2 B3 */ /* VEX bit flags */ #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ @@ -91,21 +132,44 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ -#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ +/* XOP bit fields */ +#define X86_XOP_R(xop) ((xop) & 0x80) /* XOP Byte2 */ +#define X86_XOP_X(xop) ((xop) & 0x40) /* XOP Byte2 */ +#define X86_XOP_B(xop) ((xop) & 0x20) /* XOP Byte2 */ +#define X86_XOP_M(xop) ((xop) & 0x1f) /* XOP Byte2 */ +#define X86_XOP_W(xop) ((xop) & 0x80) /* XOP Byte3 */ +#define X86_XOP_V(xop) ((xop) & 0x78) /* XOP Byte3 */ +#define X86_XOP_L(xop) ((xop) & 0x04) /* XOP Byte3 */ +#define X86_XOP_P(xop) ((xop) & 0x03) /* XOP Byte3 */ +#define X86_XOP_M_MIN 0x08 /* Min of XOP.M */ +#define X86_XOP_M_MAX 0x1f /* Max of XOP.M */ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); -extern void insn_get_prefixes(struct insn *insn); -extern void insn_get_opcode(struct insn *insn); -extern void insn_get_modrm(struct insn *insn); -extern void insn_get_sib(struct insn *insn); -extern void insn_get_displacement(struct insn *insn); -extern void insn_get_immediate(struct insn *insn); -extern void insn_get_length(struct insn *insn); +extern int insn_get_prefixes(struct insn *insn); +extern int insn_get_opcode(struct insn *insn); +extern int insn_get_modrm(struct insn *insn); +extern int insn_get_sib(struct insn *insn); +extern int insn_get_displacement(struct insn *insn); +extern int insn_get_immediate(struct insn *insn); +extern int insn_get_length(struct insn *insn); + +enum insn_mode { + INSN_MODE_32, + INSN_MODE_64, + /* Mode is determined by the current kernel build. */ + INSN_MODE_KERN, + INSN_NUM_MODES, +}; + +extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + +#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) @@ -116,18 +180,19 @@ static inline void insn_get_attribute(struct insn *insn) /* Instruction uses RIP-relative addressing */ extern int insn_rip_relative(struct insn *insn); -/* Init insn for kernel text */ -static inline void kernel_insn_init(struct insn *insn, - const void *kaddr, int buf_len) +static inline int insn_is_rex2(struct insn *insn) { -#ifdef CONFIG_X86_64 - insn_init(insn, kaddr, buf_len, 1); -#else /* CONFIG_X86_32 */ - insn_init(insn, kaddr, buf_len, 0); -#endif + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return insn->rex_prefix.nbytes == 2; } -static inline int insn_is_avx(struct insn *insn) +static inline insn_byte_t insn_rex2_m_bit(struct insn *insn) +{ + return X86_REX2_M(insn->rex_prefix.bytes[1]); +} + +static inline int insn_is_avx_or_xop(struct insn *insn) { if (!insn->prefixes.got) insn_get_prefixes(insn); @@ -141,11 +206,25 @@ static inline int insn_is_evex(struct insn *insn) return (insn->vex_prefix.nbytes == 4); } -/* Ensure this instruction is decoded completely */ -static inline int insn_complete(struct insn *insn) +/* If we already know this is AVX/XOP encoded */ +static inline int avx_insn_is_xop(struct insn *insn) +{ + insn_attr_t attr = inat_get_opcode_attribute(insn->vex_prefix.bytes[0]); + + return inat_is_xop_prefix(attr); +} + +static inline int insn_is_xop(struct insn *insn) +{ + if (!insn_is_avx_or_xop(insn)) + return 0; + + return avx_insn_is_xop(insn); +} + +static inline int insn_has_emulate_prefix(struct insn *insn) { - return insn->opcode.got && insn->modrm.got && insn->sib.got && - insn->displacement.got && insn->immediate.got; + return !!insn->emulate_prefix_size; } static inline insn_byte_t insn_vex_m_bits(struct insn *insn) @@ -166,11 +245,33 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) return X86_VEX_P(insn->vex_prefix.bytes[2]); } +static inline insn_byte_t insn_vex_w_bit(struct insn *insn) +{ + if (insn->vex_prefix.nbytes < 3) + return 0; + return X86_VEX_W(insn->vex_prefix.bytes[2]); +} + +static inline insn_byte_t insn_xop_map_bits(struct insn *insn) +{ + if (insn->xop_prefix.nbytes < 3) /* XOP is 3 bytes */ + return 0; + return X86_XOP_M(insn->xop_prefix.bytes[1]); +} + +static inline insn_byte_t insn_xop_p_bits(struct insn *insn) +{ + return X86_XOP_P(insn->vex_prefix.bytes[2]); +} + /* Get the last prefix id from last prefix or VEX prefix */ static inline int insn_last_prefix_id(struct insn *insn) { - if (insn_is_avx(insn)) + if (insn_is_avx_or_xop(insn)) { + if (avx_insn_is_xop(insn)) + return insn_xop_p_bits(insn); return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + } if (insn->prefixes.bytes[3]) return inat_get_last_prefix_id(insn->prefixes.bytes[3]); @@ -208,6 +309,20 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, prefix) \ + for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index f5a796da07f8..e48a00b3311d 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -6,13 +6,12 @@ #ifndef X86_ASM_INST_H #define X86_ASM_INST_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define REG_NUM_INVALID 100 #define REG_TYPE_R32 0 #define REG_TYPE_R64 1 -#define REG_TYPE_XMM 2 #define REG_TYPE_INVALID 100 .macro R32_NUM opd r32 @@ -123,77 +122,18 @@ #endif .endm - .macro XMM_NUM opd xmm - \opd = REG_NUM_INVALID - .ifc \xmm,%xmm0 - \opd = 0 - .endif - .ifc \xmm,%xmm1 - \opd = 1 - .endif - .ifc \xmm,%xmm2 - \opd = 2 - .endif - .ifc \xmm,%xmm3 - \opd = 3 - .endif - .ifc \xmm,%xmm4 - \opd = 4 - .endif - .ifc \xmm,%xmm5 - \opd = 5 - .endif - .ifc \xmm,%xmm6 - \opd = 6 - .endif - .ifc \xmm,%xmm7 - \opd = 7 - .endif - .ifc \xmm,%xmm8 - \opd = 8 - .endif - .ifc \xmm,%xmm9 - \opd = 9 - .endif - .ifc \xmm,%xmm10 - \opd = 10 - .endif - .ifc \xmm,%xmm11 - \opd = 11 - .endif - .ifc \xmm,%xmm12 - \opd = 12 - .endif - .ifc \xmm,%xmm13 - \opd = 13 - .endif - .ifc \xmm,%xmm14 - \opd = 14 - .endif - .ifc \xmm,%xmm15 - \opd = 15 - .endif - .endm - .macro REG_TYPE type reg R32_NUM reg_type_r32 \reg R64_NUM reg_type_r64 \reg - XMM_NUM reg_type_xmm \reg .if reg_type_r64 <> REG_NUM_INVALID \type = REG_TYPE_R64 .elseif reg_type_r32 <> REG_NUM_INVALID \type = REG_TYPE_R32 - .elseif reg_type_xmm <> REG_NUM_INVALID - \type = REG_TYPE_XMM .else \type = REG_TYPE_INVALID .endif .endm - .macro PFX_OPD_SIZE - .byte 0x66 - .endm - .macro PFX_REX opd1 opd2 W=0 .if ((\opd1 | \opd2) & 8) || \W .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) @@ -203,109 +143,6 @@ .macro MODRM mod opd1 opd2 .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) .endm - - .macro PSHUFB_XMM xmm1 xmm2 - XMM_NUM pshufb_opd1 \xmm1 - XMM_NUM pshufb_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX pshufb_opd1 pshufb_opd2 - .byte 0x0f, 0x38, 0x00 - MODRM 0xc0 pshufb_opd1 pshufb_opd2 - .endm - - .macro PCLMULQDQ imm8 xmm1 xmm2 - XMM_NUM clmul_opd1 \xmm1 - XMM_NUM clmul_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX clmul_opd1 clmul_opd2 - .byte 0x0f, 0x3a, 0x44 - MODRM 0xc0 clmul_opd1 clmul_opd2 - .byte \imm8 - .endm - - .macro PEXTRD imm8 xmm gpr - R32_NUM extrd_opd1 \gpr - XMM_NUM extrd_opd2 \xmm - PFX_OPD_SIZE - PFX_REX extrd_opd1 extrd_opd2 - .byte 0x0f, 0x3a, 0x16 - MODRM 0xc0 extrd_opd1 extrd_opd2 - .byte \imm8 - .endm - - .macro AESKEYGENASSIST rcon xmm1 xmm2 - XMM_NUM aeskeygen_opd1 \xmm1 - XMM_NUM aeskeygen_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX aeskeygen_opd1 aeskeygen_opd2 - .byte 0x0f, 0x3a, 0xdf - MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2 - .byte \rcon - .endm - - .macro AESIMC xmm1 xmm2 - XMM_NUM aesimc_opd1 \xmm1 - XMM_NUM aesimc_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX aesimc_opd1 aesimc_opd2 - .byte 0x0f, 0x38, 0xdb - MODRM 0xc0 aesimc_opd1 aesimc_opd2 - .endm - - .macro AESENC xmm1 xmm2 - XMM_NUM aesenc_opd1 \xmm1 - XMM_NUM aesenc_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX aesenc_opd1 aesenc_opd2 - .byte 0x0f, 0x38, 0xdc - MODRM 0xc0 aesenc_opd1 aesenc_opd2 - .endm - - .macro AESENCLAST xmm1 xmm2 - XMM_NUM aesenclast_opd1 \xmm1 - XMM_NUM aesenclast_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX aesenclast_opd1 aesenclast_opd2 - .byte 0x0f, 0x38, 0xdd - MODRM 0xc0 aesenclast_opd1 aesenclast_opd2 - .endm - - .macro AESDEC xmm1 xmm2 - XMM_NUM aesdec_opd1 \xmm1 - XMM_NUM aesdec_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX aesdec_opd1 aesdec_opd2 - .byte 0x0f, 0x38, 0xde - MODRM 0xc0 aesdec_opd1 aesdec_opd2 - .endm - - .macro AESDECLAST xmm1 xmm2 - XMM_NUM aesdeclast_opd1 \xmm1 - XMM_NUM aesdeclast_opd2 \xmm2 - PFX_OPD_SIZE - PFX_REX aesdeclast_opd1 aesdeclast_opd2 - .byte 0x0f, 0x38, 0xdf - MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 - .endm - - .macro MOVQ_R64_XMM opd1 opd2 - REG_TYPE movq_r64_xmm_opd1_type \opd1 - .if movq_r64_xmm_opd1_type == REG_TYPE_XMM - XMM_NUM movq_r64_xmm_opd1 \opd1 - R64_NUM movq_r64_xmm_opd2 \opd2 - .else - R64_NUM movq_r64_xmm_opd1 \opd1 - XMM_NUM movq_r64_xmm_opd2 \opd2 - .endif - PFX_OPD_SIZE - PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 - .if movq_r64_xmm_opd1_type == REG_TYPE_XMM - .byte 0x0f, 0x7e - .else - .byte 0x0f, 0x6e - .endif - MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 - .endm #endif #endif diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 0dd6b0f4000e..950bfd006905 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -5,89 +5,223 @@ /* * "Big Core" Processors (Branded as Core, Xeon, etc...) * - * The "_X" parts are generally the EP and EX Xeons, or the - * "Extreme" ones, like Broadwell-E. - * * While adding a new CPUID for a new microarchitecture, add a new * group to keep logically sorted out in chronological order. Within * that group keep the CPUID for the variants sorted by model number. + * + * The defined symbol names have the following form: + * INTEL_{OPTFAMILY}_{MICROARCH}{OPTDIFF} + * where: + * OPTFAMILY Describes the family of CPUs that this belongs to. Default + * is assumed to be "_CORE" (and should be omitted). Other values + * currently in use are _ATOM and _XEON_PHI + * MICROARCH Is the code name for the micro-architecture for this core. + * N.B. Not the platform name. + * OPTDIFF If needed, a short string to differentiate by market segment. + * + * Common OPTDIFFs: + * + * - regular client parts + * _L - regular mobile parts + * _G - parts with extra graphics on + * _X - regular server parts + * _D - micro server parts + * _N,_P - other mobile parts + * _H - premium mobile parts + * _S - other client parts + * + * Historical OPTDIFFs: + * + * _EP - 2 socket server parts + * _EX - 4+ socket server parts + * + * The #define line may optionally include a comment including platform or core + * names. An exception is made for skylake/kabylake where steppings seem to have gotten + * their own names :-( */ -#define INTEL_FAM6_CORE_YONAH 0x0E +#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model) + +/* Wildcard match so X86_MATCH_VFM(ANY) works */ +#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) + +/* Family 5 */ +#define INTEL_FAM5_START IFM(5, 0x00) /* Notational marker, also P5 A-step */ +#define INTEL_PENTIUM_75 IFM(5, 0x02) /* P54C */ +#define INTEL_PENTIUM_MMX IFM(5, 0x04) /* P55C */ +#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ + +/* Family 6, 18, 19 */ +#define INTEL_PENTIUM_PRO IFM(6, 0x01) +#define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03) +#define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05) +#define INTEL_PENTIUM_III_TUALATIN IFM(6, 0x0B) +#define INTEL_PENTIUM_M_DOTHAN IFM(6, 0x0D) + +#define INTEL_CORE_YONAH IFM(6, 0x0E) + +#define INTEL_CORE2_MEROM IFM(6, 0x0F) +#define INTEL_CORE2_MEROM_L IFM(6, 0x16) +#define INTEL_CORE2_PENRYN IFM(6, 0x17) +#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D) + +#define INTEL_NEHALEM IFM(6, 0x1E) +#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */ +#define INTEL_NEHALEM_EP IFM(6, 0x1A) +#define INTEL_NEHALEM_EX IFM(6, 0x2E) + +#define INTEL_WESTMERE IFM(6, 0x25) +#define INTEL_WESTMERE_EP IFM(6, 0x2C) +#define INTEL_WESTMERE_EX IFM(6, 0x2F) + +#define INTEL_SANDYBRIDGE IFM(6, 0x2A) +#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D) +#define INTEL_IVYBRIDGE IFM(6, 0x3A) +#define INTEL_IVYBRIDGE_X IFM(6, 0x3E) + +#define INTEL_HASWELL IFM(6, 0x3C) +#define INTEL_HASWELL_X IFM(6, 0x3F) +#define INTEL_HASWELL_L IFM(6, 0x45) +#define INTEL_HASWELL_G IFM(6, 0x46) + +#define INTEL_BROADWELL IFM(6, 0x3D) +#define INTEL_BROADWELL_G IFM(6, 0x47) +#define INTEL_BROADWELL_X IFM(6, 0x4F) +#define INTEL_BROADWELL_D IFM(6, 0x56) + +#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */ +#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */ +#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */ +/* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */ +/* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */ + +#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */ +/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */ +/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */ +/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */ + +#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */ +/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */ + +#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */ +#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */ + +#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */ -#define INTEL_FAM6_CORE2_MEROM 0x0F -#define INTEL_FAM6_CORE2_MEROM_L 0x16 -#define INTEL_FAM6_CORE2_PENRYN 0x17 -#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D +#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */ +#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */ +#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */ +#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */ +#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */ -#define INTEL_FAM6_NEHALEM 0x1E -#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ -#define INTEL_FAM6_NEHALEM_EP 0x1A -#define INTEL_FAM6_NEHALEM_EX 0x2E +#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */ -#define INTEL_FAM6_WESTMERE 0x25 -#define INTEL_FAM6_WESTMERE_EP 0x2C -#define INTEL_FAM6_WESTMERE_EX 0x2F +#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */ +#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */ -#define INTEL_FAM6_SANDYBRIDGE 0x2A -#define INTEL_FAM6_SANDYBRIDGE_X 0x2D -#define INTEL_FAM6_IVYBRIDGE 0x3A -#define INTEL_FAM6_IVYBRIDGE_X 0x3E +#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */ -#define INTEL_FAM6_HASWELL_CORE 0x3C -#define INTEL_FAM6_HASWELL_X 0x3F -#define INTEL_FAM6_HASWELL_ULT 0x45 -#define INTEL_FAM6_HASWELL_GT3E 0x46 +#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) /* Raptor Cove */ -#define INTEL_FAM6_BROADWELL_CORE 0x3D -#define INTEL_FAM6_BROADWELL_GT3E 0x47 -#define INTEL_FAM6_BROADWELL_X 0x4F -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 +#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */ +#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) -#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E -#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E -#define INTEL_FAM6_SKYLAKE_X 0x55 -#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E -#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E +#define INTEL_DIAMONDRAPIDS_X IFM(19, 0x01) /* Panther Cove */ -#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 +#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */ -/* "Small Core" Processors (Atom) */ +/* "Hybrid" Processors (P-Core/E-Core) */ -#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ -#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ +#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */ -#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ -#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ -#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ +#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */ +#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */ -#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ -#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */ -#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ +#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */ +#define INTEL_RAPTORLAKE_P IFM(6, 0xBA) +#define INTEL_RAPTORLAKE_S IFM(6, 0xBF) -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ -#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ +#define INTEL_METEORLAKE IFM(6, 0xAC) /* Redwood Cove / Crestmont */ +#define INTEL_METEORLAKE_L IFM(6, 0xAA) -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ -#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ -#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_ARROWLAKE_H IFM(6, 0xC5) /* Lion Cove / Skymont */ +#define INTEL_ARROWLAKE IFM(6, 0xC6) +#define INTEL_ARROWLAKE_U IFM(6, 0xB5) + +#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */ + +#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Darkmont */ + +#define INTEL_WILDCATLAKE_L IFM(6, 0xD5) + +#define INTEL_NOVALAKE IFM(18, 0x01) /* Coyote Cove / Arctic Wolf */ +#define INTEL_NOVALAKE_L IFM(18, 0x03) /* Coyote Cove / Arctic Wolf */ + +/* "Small Core" Processors (Atom/E-Core) */ + +#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */ +#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */ + +#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */ +#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */ +#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */ + +#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */ +#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */ +#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */ +#define INTEL_ATOM_SILVERMONT_MID2 IFM(6, 0x5A) /* Anniedale */ + +#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */ +#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */ + +#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */ +#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */ + +/* Note: the micro-architecture is "Goldmont Plus" */ +#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */ + +#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */ +#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */ +#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */ + +#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */ + +#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */ +#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */ + +#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */ /* Xeon Phi */ -#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ -#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ - -/* Useful macros */ -#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ -{ \ - .vendor = X86_VENDOR_INTEL, \ - .family = _family, \ - .model = _model, \ - .feature = X86_FEATURE_ANY, \ - .driver_data = (kernel_ulong_t)&_driver_data \ -} - -#define INTEL_CPU_FAM6(_model, _driver_data) \ - INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data) +#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */ +#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */ + +/* Notational marker denoting the last Family 6 model */ +#define INTEL_FAM6_LAST IFM(6, 0xFF) + +/* Family 15 - NetBurst */ +#define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */ +#define INTEL_P4_PRESCOTT IFM(15, 0x03) +#define INTEL_P4_PRESCOTT_2M IFM(15, 0x04) +#define INTEL_P4_CEDARMILL IFM(15, 0x06) /* Also Xeon Dempsey */ + +/* + * Intel CPU core types + * + * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture + * of the core. Bits 31-24 indicates its core type (Core or Atom) + * and Bits [23:0] indicates the native model ID of the core. + * Core type and native model ID are defined in below enumerations. + */ +enum intel_cpu_type { + INTEL_CPU_TYPE_UNKNOWN, + INTEL_CPU_TYPE_ATOM = 0x20, + INTEL_CPU_TYPE_CORE = 0x40, +}; + +enum intel_native_id { + INTEL_ATOM_CMT_NATIVE_ID = 0x2, /* Crestmont */ + INTEL_ATOM_SKT_NATIVE_ID = 0x3, /* Skymont */ +}; #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 52f815a80539..a3abdcd89a32 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -1,19 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * intel-mid.h: Intel MID specific setup code + * Intel MID specific setup code * - * (C) Copyright 2009 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. + * (C) Copyright 2009, 2021 Intel Corporation */ #ifndef _ASM_X86_INTEL_MID_H #define _ASM_X86_INTEL_MID_H -#include <linux/sfi.h> #include <linux/pci.h> -#include <linux/platform_device.h> extern int intel_mid_pci_init(void); extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); @@ -26,109 +20,4 @@ extern void intel_mid_pwr_power_off(void); extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev); -extern int get_gpio_by_name(const char *name); -extern int __init sfi_parse_mrtc(struct sfi_table_header *table); -extern int __init sfi_parse_mtmr(struct sfi_table_header *table); -extern int sfi_mrtc_num; -extern struct sfi_rtc_table_entry sfi_mrtc_array[]; - -/* - * Here defines the array of devices platform data that IAFW would export - * through SFI "DEVS" table, we use name and type to match the device and - * its platform data. - */ -struct devs_id { - char name[SFI_NAME_LEN + 1]; - u8 type; - u8 delay; - u8 msic; - void *(*get_platform_data)(void *info); -}; - -#define sfi_device(i) \ - static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \ - __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i - -/** -* struct mid_sd_board_info - template for SD device creation -* @name: identifies the driver -* @bus_num: board-specific identifier for a given SD controller -* @max_clk: the maximum frequency device supports -* @platform_data: the particular data stored there is driver-specific -*/ -struct mid_sd_board_info { - char name[SFI_NAME_LEN]; - int bus_num; - unsigned short addr; - u32 max_clk; - void *platform_data; -}; - -/* - * Medfield is the follow-up of Moorestown, it combines two chip solution into - * one. Other than that it also added always-on and constant tsc and lapic - * timers. Medfield is the platform name, and the chip name is called Penwell - * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be - * identified via MSRs. - */ -enum intel_mid_cpu_type { - /* 1 was Moorestown */ - INTEL_MID_CPU_CHIP_PENWELL = 2, - INTEL_MID_CPU_CHIP_CLOVERVIEW, - INTEL_MID_CPU_CHIP_TANGIER, -}; - -extern enum intel_mid_cpu_type __intel_mid_cpu_chip; - -#ifdef CONFIG_X86_INTEL_MID - -static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) -{ - return __intel_mid_cpu_chip; -} - -static inline bool intel_mid_has_msic(void) -{ - return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL); -} - -#else /* !CONFIG_X86_INTEL_MID */ - -#define intel_mid_identify_cpu() 0 -#define intel_mid_has_msic() 0 - -#endif /* !CONFIG_X86_INTEL_MID */ - -enum intel_mid_timer_options { - INTEL_MID_TIMER_DEFAULT, - INTEL_MID_TIMER_APBT_ONLY, - INTEL_MID_TIMER_LAPIC_APBT, -}; - -extern enum intel_mid_timer_options intel_mid_timer_options; - -/* Bus Select SoC Fuse value */ -#define BSEL_SOC_FUSE_MASK 0x7 -/* FSB 133MHz */ -#define BSEL_SOC_FUSE_001 0x1 -/* FSB 100MHz */ -#define BSEL_SOC_FUSE_101 0x5 -/* FSB 83MHz */ -#define BSEL_SOC_FUSE_111 0x7 - -#define SFI_MTMR_MAX_NUM 8 -#define SFI_MRTC_MAX 8 - -extern void intel_scu_devices_create(void); -extern void intel_scu_devices_destroy(void); - -/* VRTC timer */ -#define MRST_VRTC_MAP_SZ 1024 -/* #define MRST_VRTC_PGOFFSET 0xc00 */ - -extern void intel_mid_rtc_init(void); - -/* The offset for the mapping of global gpio pin to irq */ -#define INTEL_MID_IRQ_OFFSET 0x100 - #endif /* _ASM_X86_INTEL_MID_H */ diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index ae26df1c2789..695f87efbeb8 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -4,11 +4,21 @@ #include <linux/percpu-defs.h> #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SHIFT 4 +#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT) + +/* + * The largest PEBS record could consume a page, ensure + * a record at least can be written after triggering PMI. + */ +#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT) +#define ARCH_PEBS_THRESH_SINGLE 1 /* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 8 -#define MAX_FIXED_PEBS_EVENTS 3 +#define MAX_PEBS_EVENTS_FMT4 8 +#define MAX_PEBS_EVENTS 32 +#define MAX_PEBS_EVENTS_MASK GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0) +#define MAX_FIXED_PEBS_EVENTS 16 /* * A debug store configuration. diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h deleted file mode 100644 index 0b44b1abe4d9..000000000000 --- a/arch/x86/include/asm/intel_mid_vrtc.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _INTEL_MID_VRTC_H -#define _INTEL_MID_VRTC_H - -extern unsigned char vrtc_cmos_read(unsigned char reg); -extern void vrtc_cmos_write(unsigned char val, unsigned char reg); -extern void vrtc_get_time(struct timespec64 *now); -extern int vrtc_set_mmss(const struct timespec64 *now); - -#endif diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h deleted file mode 100644 index 3cb002b1d0f9..000000000000 --- a/arch/x86/include/asm/intel_pconfig.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _ASM_X86_INTEL_PCONFIG_H -#define _ASM_X86_INTEL_PCONFIG_H - -#include <asm/asm.h> -#include <asm/processor.h> - -enum pconfig_target { - INVALID_TARGET = 0, - MKTME_TARGET = 1, - PCONFIG_TARGET_NR -}; - -int pconfig_target_supported(enum pconfig_target target); - -enum pconfig_leaf { - MKTME_KEY_PROGRAM = 0, - PCONFIG_LEAF_INVALID, -}; - -#define PCONFIG ".byte 0x0f, 0x01, 0xc5" - -/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ - -/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ -#define MKTME_KEYID_SET_KEY_DIRECT 0 -#define MKTME_KEYID_SET_KEY_RANDOM 1 -#define MKTME_KEYID_CLEAR_KEY 2 -#define MKTME_KEYID_NO_ENCRYPT 3 - -/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ -#define MKTME_AES_XTS_128 (1 << 8) - -/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ -#define MKTME_PROG_SUCCESS 0 -#define MKTME_INVALID_PROG_CMD 1 -#define MKTME_ENTROPY_ERROR 2 -#define MKTME_INVALID_KEYID 3 -#define MKTME_INVALID_ENC_ALG 4 -#define MKTME_DEVICE_BUSY 5 - -/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */ -struct mktme_key_program { - u16 keyid; - u32 keyid_ctrl; - u8 __rsvd[58]; - u8 key_field_1[64]; - u8 key_field_2[64]; -} __packed __aligned(256); - -static inline int mktme_key_program(struct mktme_key_program *key_program) -{ - unsigned long rax = MKTME_KEY_PROGRAM; - - if (!pconfig_target_supported(MKTME_TARGET)) - return -ENXIO; - - asm volatile(PCONFIG - : "=a" (rax), "=b" (key_program) - : "0" (rax), "1" (key_program) - : "memory", "cc"); - - return rax; -} - -#endif /* _ASM_X86_INTEL_PCONFIG_H */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h deleted file mode 100644 index 9e7adcdbe031..000000000000 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_INTEL_PMC_IPC_H_ -#define _ASM_X86_INTEL_PMC_IPC_H_ - -/* Commands */ -#define PMC_IPC_PMIC_ACCESS 0xFF -#define PMC_IPC_PMIC_ACCESS_READ 0x0 -#define PMC_IPC_PMIC_ACCESS_WRITE 0x1 -#define PMC_IPC_USB_PWR_CTRL 0xF0 -#define PMC_IPC_PMIC_BLACKLIST_SEL 0xEF -#define PMC_IPC_PHY_CONFIG 0xEE -#define PMC_IPC_NORTHPEAK_CTRL 0xED -#define PMC_IPC_PM_DEBUG 0xEC -#define PMC_IPC_PMC_TELEMTRY 0xEB -#define PMC_IPC_PMC_FW_MSG_CTRL 0xEA - -/* IPC return code */ -#define IPC_ERR_NONE 0 -#define IPC_ERR_CMD_NOT_SUPPORTED 1 -#define IPC_ERR_CMD_NOT_SERVICED 2 -#define IPC_ERR_UNABLE_TO_SERVICE 3 -#define IPC_ERR_CMD_INVALID 4 -#define IPC_ERR_CMD_FAILED 5 -#define IPC_ERR_EMSECURITY 6 -#define IPC_ERR_UNSIGNEDKERNEL 7 - -/* GCR reg offsets from gcr base*/ -#define PMC_GCR_PMC_CFG_REG 0x08 -#define PMC_GCR_TELEM_DEEP_S0IX_REG 0x78 -#define PMC_GCR_TELEM_SHLW_S0IX_REG 0x80 - -#if IS_ENABLED(CONFIG_INTEL_PMC_IPC) - -int intel_pmc_ipc_simple_command(int cmd, int sub); -int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr); -int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen); -int intel_pmc_s0ix_counter_read(u64 *data); -int intel_pmc_gcr_read(u32 offset, u32 *data); -int intel_pmc_gcr_read64(u32 offset, u64 *data); -int intel_pmc_gcr_write(u32 offset, u32 data); -int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val); - -#else - -static inline int intel_pmc_ipc_simple_command(int cmd, int sub) -{ - return -EINVAL; -} - -static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr) -{ - return -EINVAL; -} - -static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen) -{ - return -EINVAL; -} - -static inline int intel_pmc_s0ix_counter_read(u64 *data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_read(u32 offset, u32 *data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_write(u32 offset, u32 data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) -{ - return -EINVAL; -} - -#endif /*CONFIG_INTEL_PMC_IPC*/ - -#endif diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h index 634f99b1dc22..c796e9bc98b6 100644 --- a/arch/x86/include/asm/intel_pt.h +++ b/arch/x86/include/asm/intel_pt.h @@ -3,7 +3,7 @@ #define _ASM_X86_INTEL_PT_H #define PT_CPUID_LEAVES 2 -#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */ +#define PT_CPUID_REGS_NUM 4 /* number of registers (eax, ebx, ecx, edx) */ enum pt_capabilities { PT_CAP_max_subleaf = 0, @@ -13,6 +13,8 @@ enum pt_capabilities { PT_CAP_mtc, PT_CAP_ptwrite, PT_CAP_power_event_trace, + PT_CAP_event_trace, + PT_CAP_tnt_disable, PT_CAP_topa_output, PT_CAP_topa_multiple_entries, PT_CAP_single_range_output, @@ -28,10 +30,12 @@ enum pt_capabilities { void cpu_emergency_stop_pt(void); extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap); extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap); +extern int is_intel_pt_event(struct perf_event *event); #else static inline void cpu_emergency_stop_pt(void) {} static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; } static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; } +static inline int is_intel_pt_event(struct perf_event *event) { return 0; } #endif #endif /* _ASM_X86_INTEL_PT_H */ diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h index ce16da719596..1f9b5d225912 100644 --- a/arch/x86/include/asm/intel_punit_ipc.h +++ b/arch/x86/include/asm/intel_punit_ipc.h @@ -80,17 +80,10 @@ typedef enum { #if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC) -int intel_punit_ipc_simple_command(int cmd, int para1, int para2); int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out); #else -static inline int intel_punit_ipc_simple_command(int cmd, - int para1, int para2) -{ - return -ENODEV; -} - static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out) { diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h deleted file mode 100644 index 4a8c6e817398..000000000000 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_INTEL_SCU_IPC_H_ -#define _ASM_X86_INTEL_SCU_IPC_H_ - -#include <linux/notifier.h> - -#define IPCMSG_INDIRECT_READ 0x02 -#define IPCMSG_INDIRECT_WRITE 0x05 - -#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */ - -#define IPCMSG_WARM_RESET 0xF0 -#define IPCMSG_COLD_RESET 0xF1 -#define IPCMSG_SOFT_RESET 0xF2 -#define IPCMSG_COLD_BOOT 0xF3 - -#define IPCMSG_VRTC 0xFA /* Set vRTC device */ - /* Command id associated with message IPCMSG_VRTC */ - #define IPC_CMD_VRTC_SETTIME 1 /* Set time */ - #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ - -/* Read single register */ -int intel_scu_ipc_ioread8(u16 addr, u8 *data); - -/* Read two sequential registers */ -int intel_scu_ipc_ioread16(u16 addr, u16 *data); - -/* Read four sequential registers */ -int intel_scu_ipc_ioread32(u16 addr, u32 *data); - -/* Read a vector */ -int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); - -/* Write single register */ -int intel_scu_ipc_iowrite8(u16 addr, u8 data); - -/* Write two sequential registers */ -int intel_scu_ipc_iowrite16(u16 addr, u16 data); - -/* Write four sequential registers */ -int intel_scu_ipc_iowrite32(u16 addr, u32 data); - -/* Write a vector */ -int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); - -/* Update single register based on the mask */ -int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); - -/* Issue commands to the SCU with or without data */ -int intel_scu_ipc_simple_command(int cmd, int sub); -int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, - u32 *out, int outlen); -int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen, - u32 *out, int outlen, u32 dptr, u32 sptr); - -/* I2C control api */ -int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data); - -/* Update FW version */ -int intel_scu_ipc_fw_update(u8 *buffer, u32 length); - -extern struct blocking_notifier_head intel_scu_notifier; - -static inline void intel_scu_notifier_add(struct notifier_block *nb) -{ - blocking_notifier_chain_register(&intel_scu_notifier, nb); -} - -static inline void intel_scu_notifier_remove(struct notifier_block *nb) -{ - blocking_notifier_chain_unregister(&intel_scu_notifier, nb); -} - -static inline int intel_scu_notifier_post(unsigned long v, void *p) -{ - return blocking_notifier_call_chain(&intel_scu_notifier, v, p); -} - -#define SCU_AVAILABLE 1 -#define SCU_DOWN 2 - -#endif diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h index 85029b58d0cd..944637a4e6de 100644 --- a/arch/x86/include/asm/intel_telemetry.h +++ b/arch/x86/include/asm/intel_telemetry.h @@ -1,17 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Intel SOC Telemetry Driver Header File * Copyright (C) 2015, Intel Corporation. * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef INTEL_TELEMETRY_H #define INTEL_TELEMETRY_H @@ -19,6 +10,8 @@ #define TELEM_MAX_EVENTS_SRAM 28 #define TELEM_MAX_OS_ALLOCATED_EVENTS 20 +#include <linux/platform_data/x86/intel_scu_ipc.h> + enum telemetry_unit { TELEM_PSS = 0, TELEM_IOSS, @@ -49,13 +42,10 @@ struct telemetry_evtmap { struct telemetry_unit_config { struct telemetry_evtmap *telem_evts; void __iomem *regmap; - u32 ssram_base_addr; u8 ssram_evts_used; u8 curr_period; u8 max_period; u8 min_period; - u32 ssram_size; - }; struct telemetry_plt_config { @@ -63,22 +53,12 @@ struct telemetry_plt_config { struct telemetry_unit_config ioss_config; struct mutex telem_trace_lock; struct mutex telem_lock; + struct intel_pmc_dev *pmc; + struct intel_scu_ipc_dev *scu; bool telem_in_use; }; struct telemetry_core_ops { - int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period, - u8 *ioss_min_period, u8 *ioss_max_period); - - int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig, - struct telemetry_evtconfig *ioss_evtconfig, - int pss_len, int ioss_len); - - int (*update_events)(struct telemetry_evtconfig pss_evtconfig, - struct telemetry_evtconfig ioss_evtconfig); - - int (*set_sampling_period)(u8 pss_period, u8 ioss_period); - int (*get_trace_verbosity)(enum telemetry_unit telem_unit, u32 *verbosity); @@ -92,11 +72,6 @@ struct telemetry_core_ops { int (*read_eventlog)(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len, int log_all_evts); - - int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts, - u32 *pss_evtmap, u32 *ioss_evtmap); - - int (*reset_events)(void); }; int telemetry_set_pltdata(const struct telemetry_core_ops *ops, @@ -104,40 +79,20 @@ int telemetry_set_pltdata(const struct telemetry_core_ops *ops, int telemetry_clear_pltdata(void); -int telemetry_pltconfig_valid(void); +struct telemetry_plt_config *telemetry_get_pltdata(void); int telemetry_get_evtname(enum telemetry_unit telem_unit, const char **name, int len); -int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig, - struct telemetry_evtconfig ioss_evtconfig); - -int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts, - u32 *pss_evtmap, u32 *ioss_evtmap); - -int telemetry_reset_events(void); - -int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config, - struct telemetry_evtconfig *ioss_config, - int pss_len, int ioss_len); - int telemetry_read_events(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len); -int telemetry_raw_read_events(enum telemetry_unit telem_unit, - struct telemetry_evtlog *evtlog, int len); - int telemetry_read_eventlog(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len); int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len); -int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period, - u8 *ioss_min_period, u8 *ioss_max_period); - -int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period); - int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit, u32 verbosity); diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h index 989cfa86de85..734482afbf81 100644 --- a/arch/x86/include/asm/invpcid.h +++ b/arch/x86/include/asm/invpcid.h @@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, * stale TLB entries and, especially if we're flushing global * mappings, we don't want the compiler to reorder any subsequent * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); + asm volatile("invpcid %[desc], %[type]" + :: [desc] "m" (desc), [type] "r" (type) : "memory"); } #define INVPCID_TYPE_INDIV_ADDR 0 diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 686247db3106..ca309a3227c7 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -35,14 +35,14 @@ * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ -#define ARCH_HAS_IOREMAP_WC -#define ARCH_HAS_IOREMAP_WT - #include <linux/string.h> #include <linux/compiler.h> +#include <linux/cc_platform.h> #include <asm/page.h> #include <asm/early_ioremap.h> #include <asm/pgtable_types.h> +#include <asm/shared/io.h> +#include <asm/special_insns.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -90,8 +90,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #define __raw_writew __writew #define __raw_writel __writel -#define mmiowb() barrier() - #ifdef CONFIG_X86_64 build_mmio_read(readq, "q", u64, "=r", :"memory") @@ -154,46 +152,32 @@ static inline void *phys_to_virt(phys_addr_t address) #define phys_to_virt phys_to_virt /* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) - -/* * ISA I/O bus memory addresses are 1:1 with the physical address. * However, we truncate the address to unsigned int to avoid undesirable - * promitions in legacy drivers. + * promotions in legacy drivers. */ static inline unsigned int isa_virt_to_bus(volatile void *address) { return (unsigned int)virt_to_phys(address); } -#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page)) #define isa_bus_to_virt phys_to_virt /* - * However PCI ones are not necessarily 1:1 and therefore these interfaces - * are forbidden in portable PCI drivers. - * - * Allow them on x86 for legacy drivers, though. - */ -#define virt_to_bus virt_to_phys -#define bus_to_virt phys_to_virt - -/* * The default ioremap() behavior is non-cached; if you need something * else, you probably want one of the following. */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -#define ioremap_nocache ioremap_nocache extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); #define ioremap_cache ioremap_cache -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, pgprot_t prot); #define ioremap_prot ioremap_prot extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); #define ioremap_encrypted ioremap_encrypted +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags); +#define arch_memremap_wb arch_memremap_wb + /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory @@ -208,17 +192,12 @@ extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ -static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) -{ - return ioremap_nocache(offset, size); -} +void __iomem *ioremap(resource_size_t offset, unsigned long size); #define ioremap ioremap extern void iounmap(volatile void __iomem *addr); #define iounmap iounmap -extern void set_iounmap_nonlazy(void); - #ifdef __KERNEL__ void memcpy_fromio(void *, const volatile void __iomem *, size_t); @@ -229,7 +208,22 @@ void memset_io(volatile void __iomem *, int, size_t); #define memcpy_toio memcpy_toio #define memset_io memset_io -#include <asm-generic/iomap.h> +#ifdef CONFIG_X86_64 +/* + * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for + * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy + * loop. + */ +static inline void __iowrite32_copy(void __iomem *to, const void *from, + size_t count) +{ + asm volatile("rep movsl" + : "=&c"(count), "=&D"(to), "=&S"(from) + : "0"(count), "1"(to), "2"(from) + : "memory"); +} +#define __iowrite32_copy __iowrite32_copy +#endif /* * ISA space is 'always mapped' on a typical x86 system, no need to @@ -264,88 +258,57 @@ static inline void slow_down_io(void) #endif -#ifdef CONFIG_AMD_MEM_ENCRYPT -#include <linux/jump_label.h> - -extern struct static_key_false sev_enable_key; -static inline bool sev_key_active(void) -{ - return static_branch_unlikely(&sev_enable_key); -} - -#else /* !CONFIG_AMD_MEM_ENCRYPT */ - -static inline bool sev_key_active(void) { return false; } - -#endif /* CONFIG_AMD_MEM_ENCRYPT */ - -#define BUILDIO(bwl, bw, type) \ -static inline void out##bwl(unsigned type value, int port) \ -{ \ - asm volatile("out" #bwl " %" #bw "0, %w1" \ - : : "a"(value), "Nd"(port)); \ -} \ - \ -static inline unsigned type in##bwl(int port) \ -{ \ - unsigned type value; \ - asm volatile("in" #bwl " %w1, %" #bw "0" \ - : "=a"(value) : "Nd"(port)); \ - return value; \ -} \ - \ -static inline void out##bwl##_p(unsigned type value, int port) \ +#define BUILDIO(bwl, type) \ +static inline void out##bwl##_p(type value, u16 port) \ { \ out##bwl(value, port); \ slow_down_io(); \ } \ \ -static inline unsigned type in##bwl##_p(int port) \ +static inline type in##bwl##_p(u16 port) \ { \ - unsigned type value = in##bwl(port); \ + type value = in##bwl(port); \ slow_down_io(); \ return value; \ } \ \ -static inline void outs##bwl(int port, const void *addr, unsigned long count) \ +static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \ { \ - if (sev_key_active()) { \ - unsigned type *value = (unsigned type *)addr; \ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ + type *value = (type *)addr; \ while (count) { \ out##bwl(*value, port); \ value++; \ count--; \ } \ } else { \ - asm volatile("rep; outs" #bwl \ + asm volatile("rep outs" #bwl \ : "+S"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } \ \ -static inline void ins##bwl(int port, void *addr, unsigned long count) \ +static inline void ins##bwl(u16 port, void *addr, unsigned long count) \ { \ - if (sev_key_active()) { \ - unsigned type *value = (unsigned type *)addr; \ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ + type *value = (type *)addr; \ while (count) { \ *value = in##bwl(port); \ value++; \ count--; \ } \ } else { \ - asm volatile("rep; ins" #bwl \ + asm volatile("rep ins" #bwl \ : "+D"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } -BUILDIO(b, b, char) -BUILDIO(w, w, short) -BUILDIO(l, , int) +BUILDIO(b, u8) +BUILDIO(w, u16) +BUILDIO(l, u32) +#undef BUILDIO -#define inb inb -#define inw inw -#define inl inl #define inb_p inb_p #define inw_p inw_p #define inl_p inl_p @@ -353,9 +316,6 @@ BUILDIO(l, , int) #define insw insw #define insl insl -#define outb outb -#define outw outw -#define outl outl #define outb_p outb_p #define outw_p outw_p #define outl_p outl_p @@ -399,6 +359,7 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc #endif +#ifdef CONFIG_AMD_MEM_ENCRYPT extern bool arch_memremap_can_ram_remap(resource_size_t offset, unsigned long size, unsigned long flags); @@ -406,5 +367,37 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); +#else +static inline bool phys_mem_access_encrypted(unsigned long phys_addr, + unsigned long size) +{ + return true; +} +#endif + +/** + * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units + * @dst: destination, in MMIO space (must be 512-bit aligned) + * @src: source + * @count: number of 512 bits quantities to submit + * + * Submit data from kernel space to MMIO space, in units of 512 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + * + * Warning: Do not use this helper unless your driver has checked that the CPU + * instruction is supported on the platform. + */ +static inline void iosubmit_cmds512(void __iomem *dst, const void *src, + size_t count) +{ + const u8 *from = src; + const u8 *end = from + count * 64; + + while (from < end) { + movdir64b_io(dst, from); + from += 64; + } +} #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index fd20a2334885..0d806513c4b3 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -13,15 +13,6 @@ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar */ -/* I/O Unit Redirection Table */ -#define IO_APIC_REDIR_VECTOR_MASK 0x000FF -#define IO_APIC_REDIR_DEST_LOGICAL 0x00800 -#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000 -#define IO_APIC_REDIR_SEND_PENDING (1 << 12) -#define IO_APIC_REDIR_REMOTE_IRR (1 << 14) -#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) -#define IO_APIC_REDIR_MASKED (1 << 16) - /* * The structure of the IO-APIC: */ @@ -65,53 +56,40 @@ union IO_APIC_reg_03 { }; struct IO_APIC_route_entry { - __u32 vector : 8, - delivery_mode : 3, /* 000: FIXED - * 001: lowest prio - * 111: ExtINT - */ - dest_mode : 1, /* 0: physical, 1: logical */ - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, /* 0: edge, 1: level */ - mask : 1, /* 0: enabled, 1: disabled */ - __reserved_2 : 15; - - __u32 __reserved_3 : 24, - dest : 8; -} __attribute__ ((packed)); - -struct IR_IO_APIC_route_entry { - __u64 vector : 8, - zero : 3, - index2 : 1, - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, - mask : 1, - reserved : 31, - format : 1, - index : 15; + union { + struct { + u64 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + delivery_status : 1, + active_low : 1, + irr : 1, + is_level : 1, + masked : 1, + reserved_0 : 15, + reserved_1 : 17, + virt_destid_8_14 : 7, + destid_0_7 : 8; + }; + struct { + u64 ir_shared_0 : 8, + ir_zero : 3, + ir_index_15 : 1, + ir_shared_1 : 5, + ir_reserved_0 : 31, + ir_format : 1, + ir_index_0_14 : 15; + }; + struct { + u64 w1 : 32, + w2 : 32; + }; + }; } __attribute__ ((packed)); struct irq_alloc_info; struct ioapic_domain_cfg; -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -#define IOAPIC_MASKED 1 -#define IOAPIC_UNMASKED 0 - -#define IOAPIC_POL_HIGH 0 -#define IOAPIC_POL_LOW 1 - -#define IOAPIC_DEST_MODE_PHYSICAL 0 -#define IOAPIC_DEST_MODE_LOGICAL 1 - #define IOAPIC_MAP_ALLOC 0x1 #define IOAPIC_MAP_CHECK 0x2 @@ -131,8 +109,8 @@ extern int mp_irq_entries; /* MP IRQ source entries */ extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; -/* 1 if "noapic" boot option passed */ -extern int skip_ioapic_setup; +/* True if "noapic" boot option passed */ +extern bool ioapic_is_disabled; /* 1 if "noapic" boot option passed */ extern int noioapicquirk; @@ -151,7 +129,7 @@ extern unsigned long io_apic_irqs; * assignment of PCI IRQ's. */ #define io_apic_assign_pci_irqs \ - (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) + (mp_irq_entries && !ioapic_is_disabled && io_apic_irqs) struct irq_cfg; extern void ioapic_insert_resources(void); @@ -162,7 +140,6 @@ extern void mask_ioapic_entries(void); extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); -extern void setup_ioapic_ids_from_mpc_nocheck(void); extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); @@ -201,6 +178,7 @@ extern void print_IO_APICs(void); #define IO_APIC_IRQ(x) 0 #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop +#define nr_ioapics (0) static inline void ioapic_insert_resources(void) { } static inline int arch_early_ioapic_init(void) { return 0; } static inline void print_IO_APICs(void) {} diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h new file mode 100644 index 000000000000..7f080f5c7def --- /dev/null +++ b/arch/x86/include/asm/io_bitmap.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOBITMAP_H +#define _ASM_X86_IOBITMAP_H + +#include <linux/refcount.h> +#include <asm/processor.h> + +struct io_bitmap { + u64 sequence; + refcount_t refcnt; + /* The maximum number of bytes to copy so all zero bits are covered */ + unsigned int max; + unsigned long bitmap[IO_BITMAP_LONGS]; +}; + +struct task_struct; + +#ifdef CONFIG_X86_IOPL_IOPERM +void io_bitmap_share(struct task_struct *tsk); +void io_bitmap_exit(struct task_struct *tsk); + +static inline void native_tss_invalidate_io_bitmap(void) +{ + /* + * Invalidate the I/O bitmap by moving io_bitmap_base outside the + * TSS limit so any subsequent I/O access from user space will + * trigger a #GP. + * + * This is correct even when VMEXIT rewrites the TSS limit + * to 0x67 as the only requirement is that the base points + * outside the limit. + */ + this_cpu_write(cpu_tss_rw.x86_tss.io_bitmap_base, + IO_BITMAP_OFFSET_INVALID); +} + +void native_tss_update_io_bitmap(void); + +#ifdef CONFIG_PARAVIRT_XXL +#include <asm/paravirt.h> +#else +#define tss_update_io_bitmap native_tss_update_io_bitmap +#define tss_invalidate_io_bitmap native_tss_invalidate_io_bitmap +#endif + +#else +static inline void io_bitmap_share(struct task_struct *tsk) { } +static inline void io_bitmap_exit(struct task_struct *tsk) { } +static inline void tss_update_io_bitmap(void) { } +#endif + +#endif diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 363e33eb6ec1..e2de092fc38c 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -1,41 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_IOMAP_H #define _ASM_X86_IOMAP_H /* * Copyright © 2008 Ingo Molnar - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include <linux/fs.h> #include <linux/mm.h> #include <linux/uaccess.h> +#include <linux/highmem.h> #include <asm/cacheflush.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> -void __iomem * -iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); +void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot); -void -iounmap_atomic(void __iomem *kvaddr); +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); -int -iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); - -void -iomap_free(resource_size_t base, unsigned long size); +void iomap_free(resource_size_t base, unsigned long size); #endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index baedab8ac538..3be2451e7bc8 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,11 +2,39 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H +#include <linux/acpi.h> + +#include <asm/e820/api.h> + extern int force_iommu, no_iommu; extern int iommu_detected; -extern int iommu_pass_through; +extern int iommu_merge; +extern int panic_on_overflow; +extern bool amd_iommu_snp_en; + +#ifdef CONFIG_SWIOTLB +extern bool x86_swiotlb_enable; +#else +#define x86_swiotlb_enable false +#endif /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +static inline int __init +arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) +{ + u64 start = rmrr->base_address; + u64 end = rmrr->end_address + 1; + int entry_type; + + entry_type = e820__get_entry_type(start, end); + if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS) + return 0; + + pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n", + start, end - 1); + return -EINVAL; +} + #endif /* _ASM_X86_IOMMU_H */ diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h deleted file mode 100644 index 1fb3fd1a83c2..000000000000 --- a/arch/x86/include/asm/iommu_table.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_IOMMU_TABLE_H -#define _ASM_X86_IOMMU_TABLE_H - -#include <asm/swiotlb.h> - -/* - * History lesson: - * The execution chain of IOMMUs in 2.6.36 looks as so: - * - * [xen-swiotlb] - * | - * +----[swiotlb *]--+ - * / | \ - * / | \ - * [GART] [Calgary] [Intel VT-d] - * / - * / - * [AMD-Vi] - * - * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip - * over the rest of IOMMUs and unconditionally initialize the SWIOTLB. - * Also it would surreptitiously initialize set the swiotlb=1 if there were - * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb - * flag would be turned off by all IOMMUs except the Calgary one. - * - * The IOMMU_INIT* macros allow a similar tree (or more complex if desired) - * to be built by defining who we depend on. - * - * And all that needs to be done is to use one of the macros in the IOMMU - * and the pci-dma.c will take care of the rest. - */ - -struct iommu_table_entry { - initcall_t detect; - initcall_t depend; - void (*early_init)(void); /* No memory allocate available. */ - void (*late_init)(void); /* Yes, can allocate memory. */ -#define IOMMU_FINISH_IF_DETECTED (1<<0) -#define IOMMU_DETECTED (1<<1) - int flags; -}; -/* - * Macro fills out an entry in the .iommu_table that is equivalent - * to the fields that 'struct iommu_table_entry' has. The entries - * that are put in the .iommu_table section are not put in any order - * hence during boot-time we will have to resort them based on - * dependency. */ - - -#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ - static const struct iommu_table_entry \ - __iommu_entry_##_detect __used \ - __attribute__ ((unused, __section__(".iommu_table"), \ - aligned((sizeof(void *))))) \ - = {_detect, _depend, _early_init, _late_init, \ - _finish ? IOMMU_FINISH_IF_DETECTED : 0} -/* - * The simplest IOMMU definition. Provide the detection routine - * and it will be run after the SWIOTLB and the other IOMMUs - * that utilize this macro. If the IOMMU is detected (ie, the - * detect routine returns a positive value), the other IOMMUs - * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer - * to stop detecting the other IOMMUs after yours has been detected. - */ -#define IOMMU_INIT_POST(_detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0) - -#define IOMMU_INIT_POST_FINISH(detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1) - -/* - * A more sophisticated version of IOMMU_INIT. This variant requires: - * a). A detection routine function. - * b). The name of the detection routine we depend on to get called - * before us. - * c). The init routine which gets called if the detection routine - * returns a positive value from the pci_iommu_alloc. This means - * no presence of a memory allocator. - * d). Similar to the 'init', except that this gets called from pci_iommu_init - * where we do have a memory allocator. - * - * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant - * in that the former will continue detecting other IOMMUs in the call - * list after the detection routine returns a positive number, while the - * latter will stop the execution chain upon first successful detection. - * Both variants will still call the 'init' and 'late_init' functions if - * they are set. - */ -#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ - __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) - -#define IOMMU_INIT(_detect, _depend, _init, _late_init) \ - __IOMMU_INIT(_detect, _depend, _init, _late_init, 0) - -void sort_iommu_table(struct iommu_table_entry *start, - struct iommu_table_entry *finish); - -void check_iommu_entries(struct iommu_table_entry *start, - struct iommu_table_entry *finish); - -#endif /* _ASM_X86_IOMMU_TABLE_H */ diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index 5270ff39b9af..8ace6559d399 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -39,6 +39,7 @@ #define BT_MBI_UNIT_PMC 0x04 #define BT_MBI_UNIT_GFX 0x06 #define BT_MBI_UNIT_SMI 0x0C +#define BT_MBI_UNIT_CCK 0x14 #define BT_MBI_UNIT_USB 0x43 #define BT_MBI_UNIT_SATA 0xA3 #define BT_MBI_UNIT_PCIE 0xA6 @@ -110,7 +111,7 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask); * This function will block all kernel access to the PMIC I2C bus, so that the * P-Unit can safely access the PMIC over the shared I2C bus. * - * Note on these systems the i2c-bus driver will request a sempahore from the + * Note on these systems the i2c-bus driver will request a semaphore from the * P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing * it, but this does not appear to be sufficient, we still need to avoid making * certain P-Unit requests during the access window to avoid problems. @@ -167,13 +168,6 @@ void iosf_mbi_unblock_punit_i2c_access(void); int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb); /** - * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier - * - * @nb: notifier_block to unregister - */ -int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); - -/** * iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus * notifier, unlocked * diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h deleted file mode 100644 index a4fe16e42b7b..000000000000 --- a/arch/x86/include/asm/ipi.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef _ASM_X86_IPI_H -#define _ASM_X86_IPI_H - -#ifdef CONFIG_X86_LOCAL_APIC - -/* - * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 - * - * Generic APIC InterProcessor Interrupt code. - * - * Moved to include file by James Cleverdon from - * arch/x86-64/kernel/smp.c - * - * Copyrights from kernel/smp.c: - * - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> - * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> - * (c) 2002,2003 Andi Kleen, SuSE Labs. - * Subject to the GNU Public License, v.2 - */ - -#include <asm/hw_irq.h> -#include <asm/apic.h> -#include <asm/smp.h> - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, - unsigned int dest) -{ - unsigned int icr = shortcut | dest; - - switch (vector) { - default: - icr |= APIC_DM_FIXED | vector; - break; - case NMI_VECTOR: - icr |= APIC_DM_NMI; - break; - } - return icr; -} - -static inline int __prepare_ICR2(unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -static inline void __xapic_wait_icr_idle(void) -{ - while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); - -/* - * This is used to send an IPI with no shorthand notation (the destination is - * specified in bits 56 to 63 of the ICR). - */ -void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest); - -extern void default_send_IPI_single(int cpu, int vector); -extern void default_send_IPI_single_phys(int cpu, int vector); -extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, - int vector); - -/* Avoid include hell */ -#define NMI_VECTOR 0x02 - -extern int no_broadcast; - -static inline void __default_local_send_IPI_allbutself(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask_allbutself(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); -} - -static inline void __default_local_send_IPI_all(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); -} - -#ifdef CONFIG_X86_32 -extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_allbutself(int vector); -extern void default_send_IPI_all(int vector); -extern void default_send_IPI_self(int vector); -#endif - -#endif - -#endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index fbb16e6b6c18..194dfff84cb1 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -11,46 +11,39 @@ #include <asm/apicdef.h> #include <asm/irq_vectors.h> +/* + * The irq entry code is in the noinstr section and the start/end of + * __irqentry_text is emitted via labels. Make the build fail if + * something moves a C function into the __irq_entry section. + */ +#define __irq_entry __invalid_section + static inline int irq_canonicalize(int irq) { return ((irq == 2) ? 9 : irq); } -#ifdef CONFIG_X86_32 -extern void irq_ctx_init(int cpu); -#else -# define irq_ctx_init(cpu) do { } while (0) -#endif - -#define __ARCH_HAS_DO_SOFTIRQ +extern int irq_init_percpu_irqstack(unsigned int cpu); struct irq_desc; extern void fixup_irqs(void); -#ifdef CONFIG_HAVE_KVM +#if IS_ENABLED(CONFIG_KVM) extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); -extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs); -extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs); -extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs); #endif extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); -extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs); - -extern __visible unsigned int do_IRQ(struct pt_regs *regs); +extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern void init_ISA_irqs(void); -extern void __init init_IRQ(void); - #ifdef CONFIG_X86_LOCAL_APIC void arch_trigger_cpumask_backtrace(const struct cpumask *mask, - bool exclude_self); + int exclude_cpu); -extern __visible void smp_x86_platform_ipi(struct pt_regs *regs); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h deleted file mode 100644 index 8f3bee821e6c..000000000000 --- a/arch/x86/include/asm/irq_regs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the per-cpu area. - * - * Jeremy Fitzhardinge <jeremy@goop.org> - */ -#ifndef _ASM_X86_IRQ_REGS_H -#define _ASM_X86_IRQ_REGS_H - -#include <asm/percpu.h> - -#define ARCH_HAS_OWN_IRQ_REGS - -DECLARE_PER_CPU(struct pt_regs *, irq_regs); - -static inline struct pt_regs *get_irq_regs(void) -{ - return this_cpu_read(irq_regs); -} - -static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) -{ - struct pt_regs *old_regs; - - old_regs = get_irq_regs(); - this_cpu_write(irq_regs, new_regs); - - return old_regs; -} - -#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 67ed72f31cc2..5a0d42464d44 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -1,20 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2012 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * This header file contains the interface of the interrupt remapping code to * the x86 interrupt management code. */ @@ -38,7 +26,22 @@ enum { IRQ_REMAP_X2APIC_MODE, }; -struct vcpu_data { +/* + * This is mainly used to communicate information back-and-forth + * between SVM and IOMMU for setting up and tearing down posted + * interrupt + */ +struct amd_iommu_pi_data { + u64 vapic_addr; /* Physical address of the vCPU's vAPIC. */ + u32 ga_tag; + u32 vector; /* Guest vector of the interrupt */ + int cpu; + bool ga_log_intr; + bool is_guest_mode; + void *ir_data; +}; + +struct intel_iommu_pi_data { u64 pi_desc_addr; /* Physical address of PI Descriptor */ u32 vector; /* Guest vector of the interrupt */ }; @@ -56,21 +59,19 @@ extern int irq_remapping_reenable(int); extern int irq_remap_enable_fault_handling(void); extern void panic_if_irq_remap(const char *msg); -extern struct irq_domain * -irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info); -extern struct irq_domain * -irq_remapping_get_irq_domain(struct irq_alloc_info *info); - -/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ -extern struct irq_domain * -arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); - /* Get parent irqdomain for interrupt remapping irqdomain */ static inline struct irq_domain *arch_get_ir_parent_domain(void) { return x86_vector_domain; } +extern bool enable_posted_msi; + +static inline bool posted_msi_supported(void) +{ + return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP); +} + #else /* CONFIG_IRQ_REMAP */ static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } @@ -85,17 +86,5 @@ static inline void panic_if_irq_remap(const char *msg) { } -static inline struct irq_domain * -irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info) -{ - return NULL; -} - -static inline struct irq_domain * -irq_remapping_get_irq_domain(struct irq_alloc_info *info) -{ - return NULL; -} - #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h new file mode 100644 index 000000000000..8325b79f2ac6 --- /dev/null +++ b/arch/x86/include/asm/irq_stack.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IRQ_STACK_H +#define _ASM_X86_IRQ_STACK_H + +#include <linux/ptrace.h> +#include <linux/objtool.h> + +#include <asm/processor.h> + +#ifdef CONFIG_X86_64 + +/* + * Macro to inline switching to an interrupt stack and invoking function + * calls from there. The following rules apply: + * + * - Ordering: + * + * 1. Write the stack pointer into the top most place of the irq + * stack. This ensures that the various unwinders can link back to the + * original stack. + * + * 2. Switch the stack pointer to the top of the irq stack. + * + * 3. Invoke whatever needs to be done (@asm_call argument) + * + * 4. Pop the original stack pointer from the top of the irq stack + * which brings it back to the original stack where it left off. + * + * - Function invocation: + * + * To allow flexible usage of the macro, the actual function code including + * the store of the arguments in the call ABI registers is handed in via + * the @asm_call argument. + * + * - Local variables: + * + * @tos: + * The @tos variable holds a pointer to the top of the irq stack and + * _must_ be allocated in a non-callee saved register as this is a + * restriction coming from objtool. + * + * Note, that (tos) is both in input and output constraints to ensure + * that the compiler does not assume that R11 is left untouched in + * case this macro is used in some place where the per cpu interrupt + * stack pointer is used again afterwards + * + * - Function arguments: + * The function argument(s), if any, have to be defined in register + * variables at the place where this is invoked. Storing the + * argument(s) in the proper register(s) is part of the @asm_call + * + * - Constraints: + * + * The constraints have to be done very carefully because the compiler + * does not know about the assembly call. + * + * output: + * As documented already above the @tos variable is required to be in + * the output constraints to make the compiler aware that R11 cannot be + * reused after the asm() statement. + * + * For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is + * required as well as this prevents certain creative GCC variants from + * misplacing the ASM code. + * + * input: + * - func: + * Immediate, which tells the compiler that the function is referenced. + * + * - tos: + * Register. The actual register is defined by the variable declaration. + * + * - function arguments: + * The constraints are handed in via the 'argconstr' argument list. They + * describe the register arguments which are used in @asm_call. + * + * clobbers: + * Function calls can clobber anything except the callee-saved + * registers. Tell the compiler. + */ +#define call_on_stack(stack, func, asm_call, argconstr...) \ +{ \ + register void *tos asm("r11"); \ + \ + tos = ((void *)(stack)); \ + \ + asm_inline volatile( \ + "movq %%rsp, (%[tos]) \n" \ + "movq %[tos], %%rsp \n" \ + \ + asm_call \ + \ + "popq %%rsp \n" \ + \ + : "+r" (tos), ASM_CALL_CONSTRAINT \ + : [__func] "i" (func), [tos] "r" (tos) argconstr \ + : "cc", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", \ + "memory" \ + ); \ +} + +#define ASM_CALL_ARG0 \ + "1: call %c[__func] \n" \ + ANNOTATE_REACHABLE(1b) " \n" + +#define ASM_CALL_ARG1 \ + "movq %[arg1], %%rdi \n" \ + ASM_CALL_ARG0 + +#define ASM_CALL_ARG2 \ + "movq %[arg2], %%rsi \n" \ + ASM_CALL_ARG1 + +#define ASM_CALL_ARG3 \ + "movq %[arg3], %%rdx \n" \ + ASM_CALL_ARG2 + +#define call_on_irqstack(func, asm_call, argconstr...) \ + call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ + func, asm_call, argconstr) + +/* Macros to assert type correctness for run_*_on_irqstack macros */ +#define assert_function_type(func, proto) \ + static_assert(__builtin_types_compatible_p(typeof(&func), proto)) + +#define assert_arg_type(arg, proto) \ + static_assert(__builtin_types_compatible_p(typeof(arg), proto)) + +/* + * Macro to invoke system vector and device interrupt C handlers. + */ +#define call_on_irqstack_cond(func, regs, asm_call, constr, c_args...) \ +{ \ + /* \ + * User mode entry and interrupt on the irq stack do not \ + * switch stacks. If from user mode the task stack is empty. \ + */ \ + if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ + irq_enter_rcu(); \ + func(c_args); \ + irq_exit_rcu(); \ + } else { \ + /* \ + * Mark the irq stack inuse _before_ and unmark _after_ \ + * switching stacks. Interrupts are disabled in both \ + * places. Invoke the stack switch macro with the call \ + * sequence which matches the above direct invocation. \ + */ \ + __this_cpu_write(hardirq_stack_inuse, true); \ + call_on_irqstack(func, asm_call, constr); \ + __this_cpu_write(hardirq_stack_inuse, false); \ + } \ +} + +/* + * Function call sequence for __call_on_irqstack() for system vectors. + * + * Note that irq_enter_rcu() and irq_exit_rcu() do not use the input + * mechanism because these functions are global and cannot be optimized out + * when compiling a particular source file which uses one of these macros. + * + * The argument (regs) does not need to be pushed or stashed in a callee + * saved register to be safe vs. the irq_enter_rcu() call because the + * clobbers already prevent the compiler from storing it in a callee + * clobbered register. As the compiler has to preserve @regs for the final + * call to idtentry_exit() anyway, it's likely that it does not cause extra + * effort for this asm magic. + */ +#define ASM_CALL_SYSVEC \ + "call irq_enter_rcu \n" \ + ASM_CALL_ARG1 \ + "call irq_exit_rcu \n" + +#define SYSVEC_CONSTRAINTS , [arg1] "r" (regs) + +#define run_sysvec_on_irqstack_cond(func, regs) \ +{ \ + assert_function_type(func, void (*)(struct pt_regs *)); \ + assert_arg_type(regs, struct pt_regs *); \ + \ + call_on_irqstack_cond(func, regs, ASM_CALL_SYSVEC, \ + SYSVEC_CONSTRAINTS, regs); \ +} + +/* + * As in ASM_CALL_SYSVEC above the clobbers force the compiler to store + * @regs and @vector in callee saved registers. + */ +#define ASM_CALL_IRQ \ + "call irq_enter_rcu \n" \ + ASM_CALL_ARG2 \ + "call irq_exit_rcu \n" + +#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" ((unsigned long)vector) + +#define run_irq_on_irqstack_cond(func, regs, vector) \ +{ \ + assert_function_type(func, void (*)(struct pt_regs *, u32)); \ + assert_arg_type(regs, struct pt_regs *); \ + assert_arg_type(vector, u32); \ + \ + call_on_irqstack_cond(func, regs, ASM_CALL_IRQ, \ + IRQ_CONSTRAINTS, regs, vector); \ +} + +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK +/* + * Macro to invoke __do_softirq on the irq stack. This is only called from + * task context when bottom halves are about to be reenabled and soft + * interrupts are pending to be processed. The interrupt stack cannot be in + * use here. + */ +#define do_softirq_own_stack() \ +{ \ + __this_cpu_write(hardirq_stack_inuse, true); \ + call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ + __this_cpu_write(hardirq_stack_inuse, false); \ +} + +#endif + +#else /* CONFIG_X86_64 */ +/* System vector handlers always run on the stack they interrupted. */ +#define run_sysvec_on_irqstack_cond(func, regs) \ +{ \ + irq_enter_rcu(); \ + func(regs); \ + irq_exit_rcu(); \ +} + +/* Switches to the irq stack within func() */ +#define run_irq_on_irqstack_cond(func, regs, vector) \ +{ \ + irq_enter_rcu(); \ + func(regs, vector); \ + irq_exit_rcu(); \ +} + +#endif /* !CONFIG_X86_64 */ + +#endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 548d90bbf919..47051871b436 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -18,16 +18,16 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts - * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts + * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts + * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * * This file enumerates the exact layout of them: */ +/* This is used as an interrupt vector when programming the APIC. */ #define NMI_VECTOR 0x02 -#define MCE_VECTOR 0x12 /* * IDT vectors usable for external interrupt sources start at 0x20. @@ -35,13 +35,6 @@ */ #define FIRST_EXTERNAL_VECTOR 0x20 -/* - * Reserve the lowest usable vector (and hence lowest priority) 0x20 for - * triggering cleanup after irq migration. 0x21-0x2f will still be used - * for device interrupts. - */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - #define IA32_SYSCALL_VECTOR 0x80 /* @@ -84,18 +77,16 @@ */ #define IRQ_WORK_VECTOR 0xf6 -#define UV_BAU_MESSAGE 0xf5 +/* 0xf5 - unused, was UV_BAU_MESSAGE */ #define DEFERRED_ERROR_VECTOR 0xf4 /* Vector on which hypervisor callbacks will be delivered */ #define HYPERVISOR_CALLBACK_VECTOR 0xf3 /* Vector for KVM to deliver posted interrupt IPI */ -#ifdef CONFIG_HAVE_KVM #define POSTED_INTR_VECTOR 0xf2 #define POSTED_INTR_WAKEUP_VECTOR 0xf1 #define POSTED_INTR_NESTED_VECTOR 0xf0 -#endif #define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef @@ -106,14 +97,23 @@ #define LOCAL_TIMER_VECTOR 0xec +/* + * Posted interrupt notification vector for all device MSIs delivered to + * the host kernel. + */ +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb + #define NR_VECTORS 256 #ifdef CONFIG_X86_LOCAL_APIC -#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR #else #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif +#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) +#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR) + /* * Size the maximum number of interrupts. * diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 80b35e3adf03..6b4d36c95165 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -9,8 +9,6 @@ static inline bool arch_irq_work_has_interrupt(void) { return boot_cpu_has(X86_FEATURE_APIC); } -extern void arch_irq_work_raise(void); -extern __visible void smp_irq_work_interrupt(struct pt_regs *regs); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index c066ffae222b..30c325c235c0 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -7,11 +7,12 @@ #ifdef CONFIG_X86_LOCAL_APIC enum { - /* Allocate contiguous CPU vectors */ - X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, - X86_IRQ_ALLOC_LEGACY = 0x2, + X86_IRQ_ALLOC_LEGACY = 0x1, }; +extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec); +extern int x86_fwspec_is_hpet(struct irq_fwspec *fwspec); + extern struct irq_domain *x86_vector_domain; extern void init_irq_alloc_info(struct irq_alloc_info *info, @@ -51,9 +52,13 @@ extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); #endif /* CONFIG_X86_IO_APIC */ #ifdef CONFIG_PCI_MSI -extern void arch_init_msi_domain(struct irq_domain *domain); +void x86_create_pci_msi_domain(void); +struct irq_domain *native_create_pci_msi_domain(void); +extern struct irq_domain *x86_pci_msi_default_domain; #else -static inline void arch_init_msi_domain(struct irq_domain *domain) { } +static inline void x86_create_pci_msi_domain(void) { } +#define native_create_pci_msi_domain NULL +#define x86_pci_msi_default_domain NULL #endif #endif diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 058e40fed167..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -4,10 +4,9 @@ #include <asm/processor-flags.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ -/* Provide __cpuidle; we can't safely include <linux/cpu.h> */ -#define __cpuidle __attribute__((__section__(".cpuidle.text"))) +#include <asm/nospec-branch.h> /* * Interrupt control: @@ -15,7 +14,7 @@ /* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ extern inline unsigned long native_save_fl(void); -extern inline unsigned long native_save_fl(void) +extern __always_inline unsigned long native_save_fl(void) { unsigned long flags; @@ -33,68 +32,57 @@ extern inline unsigned long native_save_fl(void) return flags; } -extern inline void native_restore_fl(unsigned long flags); -extern inline void native_restore_fl(unsigned long flags) -{ - asm volatile("push %0 ; popf" - : /* no output */ - :"g" (flags) - :"memory", "cc"); -} - -static inline void native_irq_disable(void) +static __always_inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); } -static inline void native_irq_enable(void) +static __always_inline void native_irq_enable(void) { asm volatile("sti": : :"memory"); } -static inline __cpuidle void native_safe_halt(void) +static __always_inline void native_safe_halt(void) { + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } -static inline __cpuidle void native_halt(void) +static __always_inline void native_halt(void) { + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } -#endif - -#ifdef CONFIG_PARAVIRT_XXL -#include <asm/paravirt.h> -#else -#ifndef __ASSEMBLY__ -#include <linux/types.h> - -static inline notrace unsigned long arch_local_save_flags(void) +static __always_inline int native_irqs_disabled_flags(unsigned long flags) { - return native_save_fl(); + return !(flags & X86_EFLAGS_IF); } -static inline notrace void arch_local_irq_restore(unsigned long flags) +static __always_inline unsigned long native_local_irq_save(void) { - native_restore_fl(flags); -} + unsigned long flags = native_save_fl(); -static inline notrace void arch_local_irq_disable(void) -{ native_irq_disable(); + + return flags; } -static inline notrace void arch_local_irq_enable(void) +static __always_inline void native_local_irq_restore(unsigned long flags) { - native_irq_enable(); + if (!native_irqs_disabled_flags(flags)) + native_irq_enable(); } +#endif + +#ifndef CONFIG_PARAVIRT +#ifndef __ASSEMBLY__ /* * Used in the idle loop; sti takes one instruction cycle * to complete: */ -static inline __cpuidle void arch_safe_halt(void) +static __always_inline void arch_safe_halt(void) { native_safe_halt(); } @@ -103,15 +91,38 @@ static inline __cpuidle void arch_safe_halt(void) * Used when interrupts are already enabled or to * shutdown the processor: */ -static inline __cpuidle void halt(void) +static __always_inline void halt(void) { native_halt(); } +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifdef CONFIG_PARAVIRT_XXL +#include <asm/paravirt.h> +#else +#ifndef __ASSEMBLER__ +#include <linux/types.h> + +static __always_inline unsigned long arch_local_save_flags(void) +{ + return native_save_fl(); +} + +static __always_inline void arch_local_irq_disable(void) +{ + native_irq_disable(); +} + +static __always_inline void arch_local_irq_enable(void) +{ + native_irq_enable(); +} /* * For spinlocks, etc: */ -static inline notrace unsigned long arch_local_irq_save(void) +static __always_inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); arch_local_irq_disable(); @@ -119,87 +130,34 @@ static inline notrace unsigned long arch_local_irq_save(void) } #else -#define ENABLE_INTERRUPTS(x) sti -#define DISABLE_INTERRUPTS(x) cli - #ifdef CONFIG_X86_64 #ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(x) pushfq; popq %rax +#define SAVE_FLAGS pushfq; popq %rax #endif -#define SWAPGS swapgs -/* - * Currently paravirt can't handle swapgs nicely when we - * don't have a stack we can rely on (such as a user space - * stack). So we either find a way around these or just fault - * and emulate if a guest tries to call swapgs directly. - * - * Either way, this is a good way to document that we don't - * have a reliable stack. x86_64 only. - */ -#define SWAPGS_UNSAFE_STACK swapgs - -#define INTERRUPT_RETURN jmp native_iret -#define USERGS_SYSRET64 \ - swapgs; \ - sysretq; -#define USERGS_SYSRET32 \ - swapgs; \ - sysretl - -#else -#define INTERRUPT_RETURN iret #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_PARAVIRT_XXL */ -#ifndef __ASSEMBLY__ -static inline int arch_irqs_disabled_flags(unsigned long flags) +#ifndef __ASSEMBLER__ +static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { return !(flags & X86_EFLAGS_IF); } -static inline int arch_irqs_disabled(void) +static __always_inline int arch_irqs_disabled(void) { unsigned long flags = arch_local_save_flags(); return arch_irqs_disabled_flags(flags); } -#endif /* !__ASSEMBLY__ */ -#ifdef __ASSEMBLY__ -#ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; -# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; -#else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_X86_64 -# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk -# define LOCKDEP_SYS_EXIT_IRQ \ - TRACE_IRQS_ON; \ - sti; \ - call lockdep_sys_exit_thunk; \ - cli; \ - TRACE_IRQS_OFF; -# else -# define LOCKDEP_SYS_EXIT \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call lockdep_sys_exit; \ - popl %edx; \ - popl %ecx; \ - popl %eax; -# define LOCKDEP_SYS_EXIT_IRQ -# endif -#else -# define LOCKDEP_SYS_EXIT -# define LOCKDEP_SYS_EXIT_IRQ -#endif -#endif /* __ASSEMBLY__ */ +static __always_inline void arch_local_irq_restore(unsigned long flags) +{ + if (!arch_irqs_disabled_flags(flags)) + arch_local_irq_enable(); +} +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h index c9803f1a2033..7ede2731dc92 100644 --- a/arch/x86/include/asm/ist.h +++ b/arch/x86/include/asm/ist.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Include file for the interface to IST BIOS * Copyright 2002 Andy Grover <andrew.grover@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef _ASM_X86_IST_H #define _ASM_X86_IST_H diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 65191ce8e1cf..05b16299588d 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -2,31 +2,39 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H -#define JUMP_LABEL_NOP_SIZE 5 - -#ifdef CONFIG_X86_64 -# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC -#else -# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC -#endif +#define HAVE_JUMP_LABEL_BATCH #include <asm/asm.h> #include <asm/nops.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/stringify.h> #include <linux/types.h> -static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\" \n\t" \ + _ASM_ALIGN "\n\t" \ + ANNOTATE_DATA_SPECIAL "\n" \ + ".long 1b - . \n\t" \ + ".long " label " - . \n\t" \ + _ASM_PTR " " key " - . \n\t" \ + ".popsection \n\t" + +/* This macro is also expanded on the Rust side. */ +#ifdef CONFIG_HAVE_JUMP_LABEL_HACK +#define ARCH_STATIC_BRANCH_ASM(key, label) \ + "1: jmp " label " # objtool NOPs this \n\t" \ + JUMP_TABLE_ENTRY(key " + 2", label) +#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */ +#define ARCH_STATIC_BRANCH_ASM(key, label) \ + "1: .byte " __stringify(BYTES_NOP5) "\n\t" \ + JUMP_TABLE_ENTRY(key, label) +#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */ + +static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" - ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + asm goto(ARCH_STATIC_BRANCH_ASM("%c0 + %c1", "%l[l_yes]") : : "i" (key), "i" (branch) : : l_yes); return false; @@ -34,16 +42,11 @@ l_yes: return true; } -static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" - ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" - "2:\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + asm goto("1:" + "jmp %l[l_yes]\n\t" + JUMP_TABLE_ENTRY("%c0 + %c1", "%l[l_yes]") : : "i" (key), "i" (branch) : : l_yes); return false; @@ -51,42 +54,8 @@ l_yes: return true; } -#else /* __ASSEMBLY__ */ - -.macro STATIC_JUMP_IF_TRUE target, key, def -.Lstatic_jump_\@: - .if \def - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .else - .byte STATIC_KEY_INIT_NOP - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key - . - .popsection -.endm - -.macro STATIC_JUMP_IF_FALSE target, key, def -.Lstatic_jump_\@: - .if \def - .byte STATIC_KEY_INIT_NOP - .else - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key + 1 - . - .popsection -.endm +extern int arch_jump_entry_size(struct jump_entry *entry); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 13e70da38bed..d7e33c7f096b 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -23,14 +23,17 @@ (1ULL << (__VIRTUAL_MASK_SHIFT - \ KASAN_SHADOW_SCALE_SHIFT))) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_KASAN void __init kasan_early_init(void); void __init kasan_init(void); +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); #else static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } +static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, + int nid) { } #endif #endif diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index db7ba2feb947..0648190467ba 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY void kernel_randomize_memory(void); +void init_trampoline_kaslr(void); #else static inline void kernel_randomize_memory(void) { } +static inline void init_trampoline_kaslr(void) {} #endif /* CONFIG_RANDOMIZE_MEMORY */ #endif diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 75f1e35e7c15..d1514e70477b 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,10 +33,12 @@ enum show_regs_mode { }; extern void die(const char *, struct pt_regs *,long); +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); -extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); -extern void show_iret_regs(struct pt_regs *regs); +extern void __show_regs(struct pt_regs *regs, enum show_regs_mode, + const char *log_lvl); +extern void show_iret_regs(struct pt_regs *regs, const char *log_lvl); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 003f2daa3b0f..5cfb27f26583 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -9,23 +9,30 @@ # define PA_SWAP_PAGE 3 # define PAGES_NR 4 #else -# define PA_CONTROL_PAGE 0 -# define VA_CONTROL_PAGE 1 -# define PA_TABLE_PAGE 2 -# define PA_SWAP_PAGE 3 -# define PAGES_NR 4 +/* Size of each exception handler referenced by the IDT */ +# define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */ +#endif + +#ifdef CONFIG_X86_64 + +#include <linux/bits.h> + +#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0) +#define RELOC_KERNEL_CACHE_INCOHERENT BIT(1) + #endif +# define KEXEC_CONTROL_PAGE_SIZE 4096 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/string.h> #include <linux/kernel.h> +#include <asm/asm.h> #include <asm/page.h> #include <asm/ptrace.h> -#include <asm/bootparam.h> struct kimage; @@ -44,7 +51,6 @@ struct kimage; /* Maximum address we can use for the control code buffer */ # define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE -# define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_386 @@ -59,32 +65,17 @@ struct kimage; /* Maximum address we can use for the control pages */ # define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1) -/* Allocate one page for the pdp and the second for the code */ -# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) - /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_X86_64 -#endif -/* Memory to backup during crash kdump */ -#define KEXEC_BACKUP_SRC_START (0UL) -#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ - -/* - * CPU does not save ss and sp on stack if execution is already - * running in kernel mode at the time of NMI occurrence. This code - * fixes it. - */ -static inline void crash_fixup_ss_esp(struct pt_regs *newregs, - struct pt_regs *oldregs) -{ -#ifdef CONFIG_X86_32 - newregs->sp = (unsigned long)&(oldregs->sp); - asm volatile("xorl %%eax, %%eax\n\t" - "movw %%ss, %%ax\n\t" - :"=a"(newregs->ss)); +extern unsigned long kexec_va_control_page; +extern unsigned long kexec_pa_table_page; +extern unsigned long kexec_pa_swap_page; +extern gate_desc kexec_debug_idt[]; +extern unsigned char kexec_debug_exc_vectors[]; +extern uint16_t kexec_debug_8250_port; +extern unsigned long kexec_debug_8250_mmio32; #endif -} /* * This function is responsible for capturing register states if coming @@ -96,63 +87,52 @@ static inline void crash_setup_regs(struct pt_regs *newregs, { if (oldregs) { memcpy(newregs, oldregs, sizeof(*newregs)); - crash_fixup_ss_esp(newregs, oldregs); } else { + asm volatile("mov %%" _ASM_BX ",%0" : "=m"(newregs->bx)); + asm volatile("mov %%" _ASM_CX ",%0" : "=m"(newregs->cx)); + asm volatile("mov %%" _ASM_DX ",%0" : "=m"(newregs->dx)); + asm volatile("mov %%" _ASM_SI ",%0" : "=m"(newregs->si)); + asm volatile("mov %%" _ASM_DI ",%0" : "=m"(newregs->di)); + asm volatile("mov %%" _ASM_BP ",%0" : "=m"(newregs->bp)); + asm volatile("mov %%" _ASM_AX ",%0" : "=m"(newregs->ax)); + asm volatile("mov %%" _ASM_SP ",%0" : "=m"(newregs->sp)); +#ifdef CONFIG_X86_64 + asm volatile("mov %%r8,%0" : "=m"(newregs->r8)); + asm volatile("mov %%r9,%0" : "=m"(newregs->r9)); + asm volatile("mov %%r10,%0" : "=m"(newregs->r10)); + asm volatile("mov %%r11,%0" : "=m"(newregs->r11)); + asm volatile("mov %%r12,%0" : "=m"(newregs->r12)); + asm volatile("mov %%r13,%0" : "=m"(newregs->r13)); + asm volatile("mov %%r14,%0" : "=m"(newregs->r14)); + asm volatile("mov %%r15,%0" : "=m"(newregs->r15)); +#endif + asm volatile("mov %%ss,%k0" : "=a"(newregs->ss)); + asm volatile("mov %%cs,%k0" : "=a"(newregs->cs)); #ifdef CONFIG_X86_32 - asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); - asm volatile("movl %%ecx,%0" : "=m"(newregs->cx)); - asm volatile("movl %%edx,%0" : "=m"(newregs->dx)); - asm volatile("movl %%esi,%0" : "=m"(newregs->si)); - asm volatile("movl %%edi,%0" : "=m"(newregs->di)); - asm volatile("movl %%ebp,%0" : "=m"(newregs->bp)); - asm volatile("movl %%eax,%0" : "=m"(newregs->ax)); - asm volatile("movl %%esp,%0" : "=m"(newregs->sp)); - asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); - asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); - asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds)); - asm volatile("movl %%es, %%eax;" :"=a"(newregs->es)); - asm volatile("pushfl; popl %0" :"=m"(newregs->flags)); -#else - asm volatile("movq %%rbx,%0" : "=m"(newregs->bx)); - asm volatile("movq %%rcx,%0" : "=m"(newregs->cx)); - asm volatile("movq %%rdx,%0" : "=m"(newregs->dx)); - asm volatile("movq %%rsi,%0" : "=m"(newregs->si)); - asm volatile("movq %%rdi,%0" : "=m"(newregs->di)); - asm volatile("movq %%rbp,%0" : "=m"(newregs->bp)); - asm volatile("movq %%rax,%0" : "=m"(newregs->ax)); - asm volatile("movq %%rsp,%0" : "=m"(newregs->sp)); - asm volatile("movq %%r8,%0" : "=m"(newregs->r8)); - asm volatile("movq %%r9,%0" : "=m"(newregs->r9)); - asm volatile("movq %%r10,%0" : "=m"(newregs->r10)); - asm volatile("movq %%r11,%0" : "=m"(newregs->r11)); - asm volatile("movq %%r12,%0" : "=m"(newregs->r12)); - asm volatile("movq %%r13,%0" : "=m"(newregs->r13)); - asm volatile("movq %%r14,%0" : "=m"(newregs->r14)); - asm volatile("movq %%r15,%0" : "=m"(newregs->r15)); - asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); - asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); - asm volatile("pushfq; popq %0" :"=m"(newregs->flags)); + asm volatile("mov %%ds,%k0" : "=a"(newregs->ds)); + asm volatile("mov %%es,%k0" : "=a"(newregs->es)); #endif + asm volatile("pushf\n\t" + "pop %0" : "=m"(newregs->flags)); newregs->ip = _THIS_IP_; } } #ifdef CONFIG_X86_32 -asmlinkage unsigned long -relocate_kernel(unsigned long indirection_page, - unsigned long control_page, - unsigned long start_address, - unsigned int has_pae, - unsigned int preserve_context); +typedef asmlinkage unsigned long +relocate_kernel_fn(unsigned long indirection_page, + unsigned long control_page, + unsigned long start_address, + unsigned int has_pae, + unsigned int preserve_context); #else -unsigned long -relocate_kernel(unsigned long indirection_page, - unsigned long page_list, - unsigned long start_address, - unsigned int preserve_context, - unsigned int sme_active); +typedef unsigned long +relocate_kernel_fn(unsigned long indirection_page, + unsigned long pa_control_page, + unsigned long start_address, + unsigned int flags); #endif - +extern relocate_kernel_fn relocate_kernel; #define ARCH_HAS_KIMAGE_ARCH #ifdef CONFIG_X86_32 @@ -167,21 +147,23 @@ struct kimage_arch { }; #else struct kimage_arch { + /* + * This is a kimage control page, as it must not overlap with either + * source or destination address ranges. + */ + pgd_t *pgd; + /* + * The virtual mapping of the control code page itself is used only + * during the transition, while the current kernel's pages are all + * in place. Thus the intermediate page table pages used to map it + * are not control pages, but instead just normal pages obtained + * with get_zeroed_page(). And have to be tracked (below) so that + * they can be freed. + */ p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; - /* Details of backup region */ - unsigned long backup_src_start; - unsigned long backup_src_sz; - - /* Physical address of backup segment */ - unsigned long backup_load_addr; - - /* Core ELF header buffer */ - void *elf_headers; - unsigned long elf_headers_sz; - unsigned long elf_load_addr; }; #endif /* CONFIG_X86_32 */ @@ -218,12 +200,38 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages +void arch_kexec_protect_crashkres(void); +#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres + +void arch_kexec_unprotect_crashkres(void); +#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres + +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup +#endif #endif -typedef void crash_vmclear_fn(void); -extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); -#endif /* __ASSEMBLY__ */ +#ifdef CONFIG_CRASH_HOTPLUG +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size +#endif + +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h new file mode 100644 index 000000000000..ff5c7134a37a --- /dev/null +++ b/arch/x86/include/asm/kfence.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * x86 KFENCE support. + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef _ASM_X86_KFENCE_H +#define _ASM_X86_KFENCE_H + +#ifndef MODULE + +#include <linux/bug.h> +#include <linux/kfence.h> + +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/set_memory.h> +#include <asm/tlbflush.h> + +/* Force 4K pages for __kfence_pool. */ +static inline bool arch_kfence_init_pool(void) +{ + unsigned long addr; + + for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr); + addr += PAGE_SIZE) { + unsigned int level; + + if (!lookup_address(addr, &level)) + return false; + + if (level != PG_LEVEL_4K) + set_memory_4k(addr, 1); + } + + return true; +} + +/* Protect the given page and flush TLB. */ +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + unsigned int level; + pte_t *pte = lookup_address(addr, &level); + + if (WARN_ON(!pte || level != PG_LEVEL_4K)) + return false; + + /* + * We need to avoid IPIs, as we may get KFENCE allocations or faults + * with interrupts disabled. Therefore, the below is best-effort, and + * does not flush TLBs on all CPUs. We can tolerate some inaccuracy; + * lazy fault handling takes care of faults after the page is PRESENT. + */ + + if (protect) + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + else + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + + /* + * Flush this CPU's TLB, assuming whoever did the allocation/free is + * likely to continue running on this CPU. + */ + preempt_disable(); + flush_tlb_one_kernel(addr); + preempt_enable(); + return true; +} + +#endif /* !MODULE */ + +#endif /* _ASM_X86_KFENCE_H */ diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h deleted file mode 100644 index 04ab8266e347..000000000000 --- a/arch/x86/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_KMAP_TYPES_H -#define _ASM_X86_KMAP_TYPES_H - -#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) -#define __WITH_KM_FENCE -#endif - -#include <asm-generic/kmap_types.h> - -#undef __WITH_KM_FENCE - -#endif /* _ASM_X86_KMAP_TYPES_H */ diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h new file mode 100644 index 000000000000..d91b37f5b4bb --- /dev/null +++ b/arch/x86/include/asm/kmsan.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * x86 KMSAN support. + * + * Copyright (C) 2022, Google LLC + * Author: Alexander Potapenko <glider@google.com> + */ + +#ifndef _ASM_X86_KMSAN_H +#define _ASM_X86_KMSAN_H + +#ifndef MODULE + +#include <asm/cpu_entry_area.h> +#include <asm/processor.h> +#include <linux/mmzone.h> + +DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow); +DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin); + +/* + * Functions below are declared in the header to make sure they are inlined. + * They all are called from kmsan_get_metadata() for every memory access in + * the kernel, so speed is important here. + */ + +/* + * Compute metadata addresses for the CPU entry area on x86. + */ +static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) +{ + unsigned long addr64 = (unsigned long)addr; + char *metadata_array; + unsigned long off; + int cpu; + + if ((addr64 < CPU_ENTRY_AREA_BASE) || + (addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE))) + return NULL; + cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE; + off = addr64 - (unsigned long)get_cpu_entry_area(cpu); + if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE)) + return NULL; + metadata_array = is_origin ? cpu_entry_area_origin : + cpu_entry_area_shadow; + return &per_cpu(metadata_array[off], cpu); +} + +/* + * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version. + */ +static inline bool kmsan_phys_addr_valid(unsigned long addr) +{ + if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) + return !(addr >> boot_cpu_data.x86_phys_bits); + else + return true; +} + +/* + * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version. + */ +static inline bool kmsan_virt_addr_valid(void *addr) +{ + unsigned long x = (unsigned long)addr; + unsigned long y = x - __START_KERNEL_map; + bool ret; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + if (unlikely(x > y)) { + x = y + phys_base; + + if (y >= KERNEL_IMAGE_SIZE) + return false; + } else { + x = y + (__START_KERNEL_map - PAGE_OFFSET); + + /* carry flag will be set if starting x was >= PAGE_OFFSET */ + if ((x > y) || !kmsan_phys_addr_valid(x)) + return false; + } + + /* + * pfn_valid() relies on RCU, and may call into the scheduler on exiting + * the critical section. However, this would result in recursion with + * KMSAN. Therefore, disable preemption here, and re-enable preemption + * below while suppressing reschedules to avoid recursion. + * + * Note, this sacrifices occasionally breaking scheduling guarantees. + * Although, a kernel compiled with KMSAN has already given up on any + * performance guarantees due to being heavily instrumented. + */ + preempt_disable(); + ret = pfn_valid(x >> PAGE_SHIFT); + preempt_enable_no_resched(); + + return ret; +} + +#endif /* !MODULE */ + +#endif /* _ASM_X86_KMSAN_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index c8cec1b39b88..5939694dfb28 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -1,22 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_KPROBES_H #define _ASM_X86_KPROBES_H /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2002, 2004 * * See arch/x86/kernel/kprobes.c for x86 kprobes history. @@ -24,12 +11,11 @@ #include <asm-generic/kprobes.h> -#define BREAKPOINT_INSTRUCTION 0xcc - #ifdef CONFIG_KPROBES #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> +#include <asm/text-patching.h> #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -38,10 +24,7 @@ struct pt_regs; struct kprobe; typedef u8 kprobe_opcode_t; -#define RELATIVEJUMP_OPCODE 0xe9 -#define RELATIVEJUMP_SIZE 5 -#define RELATIVECALL_OPCODE 0xe8 -#define RELATIVE_ADDR_SIZE 4 + #define MAX_STACK_SIZE 64 #define CUR_STACK_SIZE(ADDR) \ (current_top_of_stack() - (unsigned long)(ADDR)) @@ -53,40 +36,55 @@ typedef u8 kprobe_opcode_t; /* optinsn template addresses */ extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_clac[]; extern __visible kprobe_opcode_t optprobe_template_val[]; extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; -#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE) #define MAX_OPTINSN_SIZE \ (((unsigned long)optprobe_template_end - \ (unsigned long)optprobe_template_entry) + \ - MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) + MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE) extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); -asmlinkage void kretprobe_trampoline(void); - -extern void arch_kprobe_override_function(struct pt_regs *regs); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ kprobe_opcode_t *insn; /* - * boostable = false: This instruction type is not boostable. - * boostable = true: This instruction has been boosted: we have + * boostable = 0: This instruction type is not boostable. + * boostable = 1: This instruction has been boosted: we have * added a relative jump after the instruction copy in insn, * so no single-step and fixup are needed (unless there's * a post_handler). */ - bool boostable; - bool if_modifier; + unsigned boostable:1; + unsigned char size; /* The size of insn */ + union { + unsigned char opcode; + struct { + unsigned char type; + } jcc; + struct { + unsigned char type; + unsigned char asize; + } loop; + struct { + unsigned char reg; + } indirect; + }; + s32 rel32; /* relative offset must be s32, s16, or s8 */ + void (*emulate_op)(struct kprobe *p, struct pt_regs *regs); + /* Number of bytes of text poked */ + int tp_len; }; struct arch_optimized_insn { /* copy of the original instructions */ - kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; + kprobe_opcode_t copied_insn[DISP32_SIZE]; /* detour code buffer */ kprobe_opcode_t *insn; /* the size of instructions copied to detour code buffer */ @@ -115,10 +113,11 @@ struct kprobe_ctlblk { }; extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); extern int kprobe_int3_handler(struct pt_regs *regs); -extern int kprobe_debug_handler(struct pt_regs *regs); + +#else + +static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; } #endif /* CONFIG_KPROBES */ #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h new file mode 100644 index 000000000000..de709fb5bd76 --- /dev/null +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL) +BUILD_BUG_ON(1) +#endif + +/* + * KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate + * both DECLARE/DEFINE_STATIC_CALL() invocations and + * "static_call_update()" calls. + * + * KVM_X86_OP_OPTIONAL() can be used for those functions that can have + * a NULL definition. KVM_X86_OP_OPTIONAL_RET0() can be used likewise + * to make a definition optional, but in this case the default will + * be __static_call_return0. + */ +KVM_X86_OP(check_processor_compatibility) +KVM_X86_OP(enable_virtualization_cpu) +KVM_X86_OP(disable_virtualization_cpu) +KVM_X86_OP(hardware_unsetup) +KVM_X86_OP(has_emulated_msr) +KVM_X86_OP(vcpu_after_set_cpuid) +KVM_X86_OP(vm_init) +KVM_X86_OP_OPTIONAL(vm_destroy) +KVM_X86_OP_OPTIONAL(vm_pre_destroy) +KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate) +KVM_X86_OP(vcpu_create) +KVM_X86_OP(vcpu_free) +KVM_X86_OP(vcpu_reset) +KVM_X86_OP(prepare_switch_to_guest) +KVM_X86_OP(vcpu_load) +KVM_X86_OP(vcpu_put) +KVM_X86_OP(update_exception_bitmap) +KVM_X86_OP(get_msr) +KVM_X86_OP(set_msr) +KVM_X86_OP(get_segment_base) +KVM_X86_OP(get_segment) +KVM_X86_OP(get_cpl) +KVM_X86_OP(get_cpl_no_cache) +KVM_X86_OP(set_segment) +KVM_X86_OP(get_cs_db_l_bits) +KVM_X86_OP(is_valid_cr0) +KVM_X86_OP(set_cr0) +KVM_X86_OP_OPTIONAL(post_set_cr3) +KVM_X86_OP(is_valid_cr4) +KVM_X86_OP(set_cr4) +KVM_X86_OP(set_efer) +KVM_X86_OP(get_idt) +KVM_X86_OP(set_idt) +KVM_X86_OP(get_gdt) +KVM_X86_OP(set_gdt) +KVM_X86_OP(sync_dirty_debug_regs) +KVM_X86_OP(set_dr7) +KVM_X86_OP(cache_reg) +KVM_X86_OP(get_rflags) +KVM_X86_OP(set_rflags) +KVM_X86_OP(get_if_flag) +KVM_X86_OP(flush_tlb_all) +KVM_X86_OP(flush_tlb_current) +#if IS_ENABLED(CONFIG_HYPERV) +KVM_X86_OP_OPTIONAL(flush_remote_tlbs) +KVM_X86_OP_OPTIONAL(flush_remote_tlbs_range) +#endif +KVM_X86_OP(flush_tlb_gva) +KVM_X86_OP(flush_tlb_guest) +KVM_X86_OP(vcpu_pre_run) +KVM_X86_OP(vcpu_run) +KVM_X86_OP(handle_exit) +KVM_X86_OP(skip_emulated_instruction) +KVM_X86_OP_OPTIONAL(update_emulated_instruction) +KVM_X86_OP(set_interrupt_shadow) +KVM_X86_OP(get_interrupt_shadow) +KVM_X86_OP(patch_hypercall) +KVM_X86_OP(inject_irq) +KVM_X86_OP(inject_nmi) +KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending) +KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending) +KVM_X86_OP(inject_exception) +KVM_X86_OP(cancel_injection) +KVM_X86_OP(interrupt_allowed) +KVM_X86_OP(nmi_allowed) +KVM_X86_OP(get_nmi_mask) +KVM_X86_OP(set_nmi_mask) +KVM_X86_OP(enable_nmi_window) +KVM_X86_OP(enable_irq_window) +KVM_X86_OP_OPTIONAL(update_cr8_intercept) +KVM_X86_OP(refresh_apicv_exec_ctrl) +KVM_X86_OP_OPTIONAL(hwapic_isr_update) +KVM_X86_OP_OPTIONAL(load_eoi_exitmap) +KVM_X86_OP_OPTIONAL(set_virtual_apic_mode) +KVM_X86_OP_OPTIONAL(set_apic_access_page_addr) +KVM_X86_OP(deliver_interrupt) +KVM_X86_OP_OPTIONAL(sync_pir_to_irr) +KVM_X86_OP_OPTIONAL_RET0(set_tss_addr) +KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr) +KVM_X86_OP_OPTIONAL_RET0(get_mt_mask) +KVM_X86_OP(load_mmu_pgd) +KVM_X86_OP_OPTIONAL(link_external_spt) +KVM_X86_OP_OPTIONAL(set_external_spte) +KVM_X86_OP_OPTIONAL(free_external_spt) +KVM_X86_OP_OPTIONAL(remove_external_spte) +KVM_X86_OP(has_wbinvd_exit) +KVM_X86_OP(get_l2_tsc_offset) +KVM_X86_OP(get_l2_tsc_multiplier) +KVM_X86_OP(write_tsc_offset) +KVM_X86_OP(write_tsc_multiplier) +KVM_X86_OP(get_exit_info) +KVM_X86_OP(get_entry_info) +KVM_X86_OP(check_intercept) +KVM_X86_OP(handle_exit_irqoff) +KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging) +KVM_X86_OP_OPTIONAL(vcpu_blocking) +KVM_X86_OP_OPTIONAL(vcpu_unblocking) +KVM_X86_OP_OPTIONAL(pi_update_irte) +KVM_X86_OP_OPTIONAL(pi_start_bypass) +KVM_X86_OP_OPTIONAL(apicv_pre_state_restore) +KVM_X86_OP_OPTIONAL(apicv_post_state_restore) +KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) +KVM_X86_OP_OPTIONAL(protected_apic_has_interrupt) +KVM_X86_OP_OPTIONAL(set_hv_timer) +KVM_X86_OP_OPTIONAL(cancel_hv_timer) +KVM_X86_OP(setup_mce) +#ifdef CONFIG_KVM_SMM +KVM_X86_OP(smi_allowed) +KVM_X86_OP(enter_smm) +KVM_X86_OP(leave_smm) +KVM_X86_OP(enable_smi_window) +#endif +KVM_X86_OP_OPTIONAL(dev_get_attr) +KVM_X86_OP_OPTIONAL(mem_enc_ioctl) +KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl) +KVM_X86_OP_OPTIONAL(vcpu_mem_enc_unlocked_ioctl) +KVM_X86_OP_OPTIONAL(mem_enc_register_region) +KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) +KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) +KVM_X86_OP_OPTIONAL(vm_move_enc_context_from) +KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) +KVM_X86_OP(get_feature_msr) +KVM_X86_OP(check_emulate_instruction) +KVM_X86_OP(apic_init_signal_blocked) +KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) +KVM_X86_OP_OPTIONAL(migrate_timers) +KVM_X86_OP(recalc_intercepts) +KVM_X86_OP(complete_emulated_msr) +KVM_X86_OP(vcpu_deliver_sipi_vector) +KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); +KVM_X86_OP_OPTIONAL(get_untagged_addr) +KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) +KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) +KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level) +KVM_X86_OP_OPTIONAL(gmem_invalidate) + +#undef KVM_X86_OP +#undef KVM_X86_OP_OPTIONAL +#undef KVM_X86_OP_OPTIONAL_RET0 diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h new file mode 100644 index 000000000000..9159bf1a4730 --- /dev/null +++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL) +BUILD_BUG_ON(1) +#endif + +/* + * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate + * both DECLARE/DEFINE_STATIC_CALL() invocations and + * "static_call_update()" calls. + * + * KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have + * a NULL definition. + */ +KVM_X86_PMU_OP(rdpmc_ecx_to_pmc) +KVM_X86_PMU_OP(msr_idx_to_pmc) +KVM_X86_PMU_OP_OPTIONAL(check_rdpmc_early) +KVM_X86_PMU_OP(is_valid_msr) +KVM_X86_PMU_OP(get_msr) +KVM_X86_PMU_OP(set_msr) +KVM_X86_PMU_OP(refresh) +KVM_X86_PMU_OP(init) +KVM_X86_PMU_OP_OPTIONAL(reset) +KVM_X86_PMU_OP_OPTIONAL(deliver_pmi) +KVM_X86_PMU_OP_OPTIONAL(cleanup) + +#undef KVM_X86_PMU_OP +#undef KVM_X86_PMU_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h deleted file mode 100644 index 93c4bf598fb0..000000000000 --- a/arch/x86/include/asm/kvm_emulate.h +++ /dev/null @@ -1,457 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/****************************************************************************** - * x86_emulate.h - * - * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. - * - * Copyright (c) 2005 Keir Fraser - * - * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 - */ - -#ifndef _ASM_X86_KVM_X86_EMULATE_H -#define _ASM_X86_KVM_X86_EMULATE_H - -#include <asm/desc_defs.h> - -struct x86_emulate_ctxt; -enum x86_intercept; -enum x86_intercept_stage; - -struct x86_exception { - u8 vector; - bool error_code_valid; - u16 error_code; - bool nested_page_fault; - u64 address; /* cr2 or nested page fault gpa */ - u8 async_page_fault; -}; - -/* - * This struct is used to carry enough information from the instruction - * decoder to main KVM so that a decision can be made whether the - * instruction needs to be intercepted or not. - */ -struct x86_instruction_info { - u8 intercept; /* which intercept */ - u8 rep_prefix; /* rep prefix? */ - u8 modrm_mod; /* mod part of modrm */ - u8 modrm_reg; /* index of register used */ - u8 modrm_rm; /* rm part of modrm */ - u64 src_val; /* value of source operand */ - u64 dst_val; /* value of destination operand */ - u8 src_bytes; /* size of source operand */ - u8 dst_bytes; /* size of destination operand */ - u8 ad_bytes; /* size of src/dst address */ - u64 next_rip; /* rip following the instruction */ -}; - -/* - * x86_emulate_ops: - * - * These operations represent the instruction emulator's interface to memory. - * There are two categories of operation: those that act on ordinary memory - * regions (*_std), and those that act on memory regions known to require - * special treatment or emulation (*_emulated). - * - * The emulator assumes that an instruction accesses only one 'emulated memory' - * location, that this location is the given linear faulting address (cr2), and - * that this is one of the instruction's data operands. Instruction fetches and - * stack operations are assumed never to access emulated memory. The emulator - * automatically deduces which operand of a string-move operation is accessing - * emulated memory, and assumes that the other operand accesses normal memory. - * - * NOTES: - * 1. The emulator isn't very smart about emulated vs. standard memory. - * 'Emulated memory' access addresses should be checked for sanity. - * 'Normal memory' accesses may fault, and the caller must arrange to - * detect and handle reentrancy into the emulator via recursive faults. - * Accesses may be unaligned and may cross page boundaries. - * 2. If the access fails (cannot emulate, or a standard access faults) then - * it is up to the memop to propagate the fault to the guest VM via - * some out-of-band mechanism, unknown to the emulator. The memop signals - * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will - * then immediately bail. - * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only - * cmpxchg8b_emulated need support 8-byte accesses. - * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system. - */ -/* Access completed successfully: continue emulation as normal. */ -#define X86EMUL_CONTINUE 0 -/* Access is unhandleable: bail from emulation and return error to caller. */ -#define X86EMUL_UNHANDLEABLE 1 -/* Terminate emulation but return success to the caller. */ -#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ -#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ -#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ -#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ -#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ - -struct x86_emulate_ops { - /* - * read_gpr: read a general purpose register (rax - r15) - * - * @reg: gpr number. - */ - ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg); - /* - * write_gpr: write a general purpose register (rax - r15) - * - * @reg: gpr number. - * @val: value to write. - */ - void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val); - /* - * read_std: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*read_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, - unsigned int bytes, - struct x86_exception *fault, bool system); - - /* - * read_phys: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Physical address from which to read. - * @val: [OUT] Value read from memory. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, - void *val, unsigned int bytes); - - /* - * write_std: Write bytes of standard (non-emulated/special) memory. - * Used for descriptor writing. - * @addr: [IN ] Linear address to which to write. - * @val: [OUT] Value write to memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to write to memory. - * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*write_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault, bool system); - /* - * fetch: Read bytes of standard (non-emulated/special) memory. - * Used for instruction fetch. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*fetch)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); - - /* - * read_emulated: Read bytes from emulated/special memory area. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); - - /* - * write_emulated: Write bytes to emulated/special memory area. - * @addr: [IN ] Linear address to which to write. - * @val: [IN ] Value to write to memory (low-order bytes used as - * required). - * @bytes: [IN ] Number of bytes to write to memory. - */ - int (*write_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, const void *val, - unsigned int bytes, - struct x86_exception *fault); - - /* - * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an - * emulated/special memory area. - * @addr: [IN ] Linear address to access. - * @old: [IN ] Value expected to be current at @addr. - * @new: [IN ] Value to write to @addr. - * @bytes: [IN ] Number of bytes to access using CMPXCHG. - */ - int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, - const void *old, - const void *new, - unsigned int bytes, - struct x86_exception *fault); - void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); - - int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, - int size, unsigned short port, void *val, - unsigned int count); - - int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, - int size, unsigned short port, const void *val, - unsigned int count); - - bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, - struct desc_struct *desc, u32 *base3, int seg); - void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, - struct desc_struct *desc, u32 base3, int seg); - unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, - int seg); - void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); - int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); - int (*cpl)(struct x86_emulate_ctxt *ctxt); - int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); - int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); - u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt); - void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase); - int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); - int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); - int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); - int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); - void (*halt)(struct x86_emulate_ctxt *ctxt); - void (*wbinvd)(struct x86_emulate_ctxt *ctxt); - int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); - int (*intercept)(struct x86_emulate_ctxt *ctxt, - struct x86_instruction_info *info, - enum x86_intercept_stage stage); - - bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, - u32 *ecx, u32 *edx, bool check_limit); - void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); - - unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); - void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); - int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase); - -}; - -typedef u32 __attribute__((vector_size(16))) sse128_t; - -/* Type, address-of, and value of an instruction's operand. */ -struct operand { - enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; - unsigned int bytes; - unsigned int count; - union { - unsigned long orig_val; - u64 orig_val64; - }; - union { - unsigned long *reg; - struct segmented_address { - ulong ea; - unsigned seg; - } mem; - unsigned xmm; - unsigned mm; - } addr; - union { - unsigned long val; - u64 val64; - char valptr[sizeof(sse128_t)]; - sse128_t vec_val; - u64 mm_val; - void *data; - }; -}; - -struct fetch_cache { - u8 data[15]; - u8 *ptr; - u8 *end; -}; - -struct read_cache { - u8 data[1024]; - unsigned long pos; - unsigned long end; -}; - -/* Execution mode, passed to the emulator. */ -enum x86emul_mode { - X86EMUL_MODE_REAL, /* Real mode. */ - X86EMUL_MODE_VM86, /* Virtual 8086 mode. */ - X86EMUL_MODE_PROT16, /* 16-bit protected mode. */ - X86EMUL_MODE_PROT32, /* 32-bit protected mode. */ - X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ -}; - -/* These match some of the HF_* flags defined in kvm_host.h */ -#define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ -#define X86EMUL_SMM_MASK (1 << 6) -#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) - -struct x86_emulate_ctxt { - const struct x86_emulate_ops *ops; - - /* Register state before/after emulation. */ - unsigned long eflags; - unsigned long eip; /* eip before instruction emulation */ - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ - enum x86emul_mode mode; - - /* interruptibility state, as a result of execution of STI or MOV SS */ - int interruptibility; - - bool perm_ok; /* do not check permissions if true */ - bool ud; /* inject an #UD if host doesn't support insn */ - bool tf; /* TF value before instruction (after for syscall/sysret) */ - - bool have_exception; - struct x86_exception exception; - - /* - * decode cache - */ - - /* current opcode length in bytes */ - u8 opcode_len; - u8 b; - u8 intercept; - u8 op_bytes; - u8 ad_bytes; - struct operand src; - struct operand src2; - struct operand dst; - int (*execute)(struct x86_emulate_ctxt *ctxt); - int (*check_perm)(struct x86_emulate_ctxt *ctxt); - /* - * The following six fields are cleared together, - * the rest are initialized unconditionally in x86_decode_insn - * or elsewhere - */ - bool rip_relative; - u8 rex_prefix; - u8 lock_prefix; - u8 rep_prefix; - /* bitmaps of registers in _regs[] that can be read */ - u32 regs_valid; - /* bitmaps of registers in _regs[] that have been written */ - u32 regs_dirty; - /* modrm */ - u8 modrm; - u8 modrm_mod; - u8 modrm_reg; - u8 modrm_rm; - u8 modrm_seg; - u8 seg_override; - u64 d; - unsigned long _eip; - struct operand memop; - /* Fields above regs are cleared together. */ - unsigned long _regs[NR_VCPU_REGS]; - struct operand *memopp; - struct fetch_cache fetch; - struct read_cache io_read; - struct read_cache mem_read; -}; - -/* Repeat String Operation Prefix */ -#define REPE_PREFIX 0xf3 -#define REPNE_PREFIX 0xf2 - -/* CPUID vendors */ -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 - -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41 -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574 -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273 - -#define X86EMUL_CPUID_VENDOR_HygonGenuine_ebx 0x6f677948 -#define X86EMUL_CPUID_VENDOR_HygonGenuine_ecx 0x656e6975 -#define X86EMUL_CPUID_VENDOR_HygonGenuine_edx 0x6e65476e - -#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547 -#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e -#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69 - -enum x86_intercept_stage { - X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ - X86_ICPT_PRE_EXCEPT, - X86_ICPT_POST_EXCEPT, - X86_ICPT_POST_MEMACCESS, -}; - -enum x86_intercept { - x86_intercept_none, - x86_intercept_cr_read, - x86_intercept_cr_write, - x86_intercept_clts, - x86_intercept_lmsw, - x86_intercept_smsw, - x86_intercept_dr_read, - x86_intercept_dr_write, - x86_intercept_lidt, - x86_intercept_sidt, - x86_intercept_lgdt, - x86_intercept_sgdt, - x86_intercept_lldt, - x86_intercept_sldt, - x86_intercept_ltr, - x86_intercept_str, - x86_intercept_rdtsc, - x86_intercept_rdpmc, - x86_intercept_pushf, - x86_intercept_popf, - x86_intercept_cpuid, - x86_intercept_rsm, - x86_intercept_iret, - x86_intercept_intn, - x86_intercept_invd, - x86_intercept_pause, - x86_intercept_hlt, - x86_intercept_invlpg, - x86_intercept_invlpga, - x86_intercept_vmrun, - x86_intercept_vmload, - x86_intercept_vmsave, - x86_intercept_vmmcall, - x86_intercept_stgi, - x86_intercept_clgi, - x86_intercept_skinit, - x86_intercept_rdtscp, - x86_intercept_icebp, - x86_intercept_wbinvd, - x86_intercept_monitor, - x86_intercept_mwait, - x86_intercept_rdmsr, - x86_intercept_wrmsr, - x86_intercept_in, - x86_intercept_ins, - x86_intercept_out, - x86_intercept_outs, - - nr_x86_intercepts -}; - -/* Host execution mode. */ -#if defined(CONFIG_X86_32) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 -#elif defined(CONFIG_X86_64) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 -#endif - -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); -bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); -#define EMULATION_FAILED -1 -#define EMULATION_OK 0 -#define EMULATION_RESTART 1 -#define EMULATION_INTERCEPTED 2 -void init_decode_cache(struct x86_emulate_ctxt *ctxt); -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); -int emulator_task_switch(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, int idt_index, int reason, - bool has_error_code, u32 error_code); -int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); -void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); -void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); -bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt); - -#endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4660ce90de7f..5a3bfa293e8b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Kernel-based Virtual Machine driver for Linux * * This header defines architecture specific interfaces, x86 version - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #ifndef _ASM_X86_KVM_HOST_H @@ -18,6 +15,7 @@ #include <linux/cpumask.h> #include <linux/irq_work.h> #include <linux/irq.h> +#include <linux/workqueue.h> #include <linux/kvm.h> #include <linux/kvm_para.h> @@ -26,43 +24,81 @@ #include <linux/pvclock_gtod.h> #include <linux/clocksource.h> #include <linux/irqbypass.h> -#include <linux/hyperv.h> +#include <linux/kfifo.h> +#include <linux/sched/vhost_task.h> +#include <linux/call_once.h> +#include <linux/atomic.h> #include <asm/apic.h> #include <asm/pvclock-abi.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> +#include <asm/msr.h> #include <asm/asm.h> +#include <asm/irq_remapping.h> #include <asm/kvm_page_track.h> -#include <asm/hyperv-tlfs.h> +#include <asm/kvm_vcpu_regs.h> +#include <asm/reboot.h> +#include <hyperv/hvhdk.h> + +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + +/* + * CONFIG_KVM_MAX_NR_VCPUS is defined iff CONFIG_KVM!=n, provide a dummy max if + * KVM is disabled (arbitrarily use the default from CONFIG_KVM_MAX_NR_VCPUS). + */ +#ifdef CONFIG_KVM_MAX_NR_VCPUS +#define KVM_MAX_VCPUS CONFIG_KVM_MAX_NR_VCPUS +#else +#define KVM_MAX_VCPUS 1024 +#endif + +/* + * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs + * might be larger than the actual number of VCPUs because the + * APIC ID encodes CPU topology information. + * + * In the worst case, we'll need less than one extra bit for the + * Core ID, and less than one extra bit for the Package (Die) ID, + * so ratio of 4 should be enough. + */ +#define KVM_VCPU_ID_RATIO 4 +#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) -#define KVM_MAX_VCPUS 288 -#define KVM_SOFT_MAX_VCPUS 240 -#define KVM_MAX_VCPU_ID 1023 -#define KVM_USER_MEM_SLOTS 509 /* memory slots that are not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 3 -#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) +#define KVM_INTERNAL_MEM_SLOTS 3 #define KVM_HALT_POLL_NS_DEFAULT 200000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + +#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ + KVM_BUS_LOCK_DETECTION_EXIT) + +#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \ + KVM_X86_NOTIFY_VMEXIT_USER) + /* x86-specific vcpu->requests bit members */ #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) -#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5) +#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) #define KVM_REQ_EVENT KVM_ARCH_REQ(6) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) #define KVM_REQ_NMI KVM_ARCH_REQ(9) #define KVM_REQ_PMU KVM_ARCH_REQ(10) #define KVM_REQ_PMI KVM_ARCH_REQ(11) +#ifdef CONFIG_KVM_SMM #define KVM_REQ_SMI KVM_ARCH_REQ(12) +#endif #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) #define KVM_REQ_MCLOCK_INPROGRESS \ KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) @@ -77,7 +113,22 @@ #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) -#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) +#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24) +#define KVM_REQ_APICV_UPDATE \ + KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) +#define KVM_REQ_TLB_FLUSH_GUEST \ + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_APF_READY KVM_ARCH_REQ(28) +#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29) +#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ + KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ + KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HV_TLB_FLUSH \ + KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \ + KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -90,7 +141,8 @@ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ - | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP)) + | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \ + | X86_CR4_LAM_SUP | X86_CR4_CET)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -99,71 +151,56 @@ #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) -#define UNMAPPED_GVA (~(gpa_t)0) - /* KVM Hugepage definitions for x86 */ -enum { - PT_PAGE_TABLE_LEVEL = 1, - PT_DIRECTORY_LEVEL = 2, - PT_PDPE_LEVEL = 3, - /* set max level to the biggest one */ - PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL, -}; -#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \ - PT_PAGE_TABLE_LEVEL + 1) +#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G +#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) -{ - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); -} - -#define KVM_PERMILLE_MMU_PAGES 20 -#define KVM_MIN_ALLOC_MMU_PAGES 64 +#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50 +#define KVM_MIN_ALLOC_MMU_PAGES 64UL #define KVM_MMU_HASH_SHIFT 12 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 -#define KVM_MAX_CPUID_ENTRIES 80 -#define KVM_NR_FIXED_MTRR_REGION 88 +#define KVM_MAX_CPUID_ENTRIES 256 #define KVM_NR_VAR_MTRR 8 #define ASYNC_PF_PER_VCPU 64 enum kvm_reg { - VCPU_REGS_RAX = 0, - VCPU_REGS_RCX = 1, - VCPU_REGS_RDX = 2, - VCPU_REGS_RBX = 3, - VCPU_REGS_RSP = 4, - VCPU_REGS_RBP = 5, - VCPU_REGS_RSI = 6, - VCPU_REGS_RDI = 7, + VCPU_REGS_RAX = __VCPU_REGS_RAX, + VCPU_REGS_RCX = __VCPU_REGS_RCX, + VCPU_REGS_RDX = __VCPU_REGS_RDX, + VCPU_REGS_RBX = __VCPU_REGS_RBX, + VCPU_REGS_RSP = __VCPU_REGS_RSP, + VCPU_REGS_RBP = __VCPU_REGS_RBP, + VCPU_REGS_RSI = __VCPU_REGS_RSI, + VCPU_REGS_RDI = __VCPU_REGS_RDI, #ifdef CONFIG_X86_64 - VCPU_REGS_R8 = 8, - VCPU_REGS_R9 = 9, - VCPU_REGS_R10 = 10, - VCPU_REGS_R11 = 11, - VCPU_REGS_R12 = 12, - VCPU_REGS_R13 = 13, - VCPU_REGS_R14 = 14, - VCPU_REGS_R15 = 15, + VCPU_REGS_R8 = __VCPU_REGS_R8, + VCPU_REGS_R9 = __VCPU_REGS_R9, + VCPU_REGS_R10 = __VCPU_REGS_R10, + VCPU_REGS_R11 = __VCPU_REGS_R11, + VCPU_REGS_R12 = __VCPU_REGS_R12, + VCPU_REGS_R13 = __VCPU_REGS_R13, + VCPU_REGS_R14 = __VCPU_REGS_R14, + VCPU_REGS_R15 = __VCPU_REGS_R15, #endif VCPU_REGS_RIP, - NR_VCPU_REGS -}; + NR_VCPU_REGS, -enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR0, VCPU_EXREG_CR3, + VCPU_EXREG_CR4, VCPU_EXREG_RFLAGS, VCPU_EXREG_SEGMENTS, + VCPU_EXREG_EXIT_INFO_1, + VCPU_EXREG_EXIT_INFO_2, }; enum { @@ -177,54 +214,79 @@ enum { VCPU_SREG_LDTR, }; -#include <asm/kvm_emulate.h> +enum exit_fastpath_completion { + EXIT_FASTPATH_NONE, + EXIT_FASTPATH_REENTER_GUEST, + EXIT_FASTPATH_EXIT_HANDLED, + EXIT_FASTPATH_EXIT_USERSPACE, +}; +typedef enum exit_fastpath_completion fastpath_t; -#define KVM_NR_MEM_OBJS 40 +struct x86_emulate_ctxt; +struct x86_exception; +union kvm_smram; +enum x86_intercept; +enum x86_intercept_stage; #define KVM_NR_DB_REGS 4 +#define DR6_BUS_LOCK (1 << 11) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) -#define DR6_FIXED_1 0xfffe0ff0 -#define DR6_INIT 0xffff0ff0 -#define DR6_VOLATILE 0x0001e00f +/* + * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. + * We can regard all the bits in DR6_FIXED_1 as active_low bits; + * they will never be 0 for now, but when they are defined + * in the future it will require no code change. + * + * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. + */ +#define DR6_ACTIVE_LOW 0xffff0ff0 +#define DR6_VOLATILE 0x0001e80f +#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) -#define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff -#define PFERR_PRESENT_BIT 0 -#define PFERR_WRITE_BIT 1 -#define PFERR_USER_BIT 2 -#define PFERR_RSVD_BIT 3 -#define PFERR_FETCH_BIT 4 -#define PFERR_PK_BIT 5 -#define PFERR_GUEST_FINAL_BIT 32 -#define PFERR_GUEST_PAGE_BIT 33 - -#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) -#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) -#define PFERR_USER_MASK (1U << PFERR_USER_BIT) -#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) -#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) -#define PFERR_PK_MASK (1U << PFERR_PK_BIT) -#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT) -#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) - -#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ - PFERR_WRITE_MASK | \ - PFERR_PRESENT_MASK) +#define KVM_GUESTDBG_VALID_MASK \ + (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_SINGLESTEP | \ + KVM_GUESTDBG_USE_HW_BP | \ + KVM_GUESTDBG_USE_SW_BP | \ + KVM_GUESTDBG_INJECT_BP | \ + KVM_GUESTDBG_INJECT_DB | \ + KVM_GUESTDBG_BLOCKIRQ) + +#define PFERR_PRESENT_MASK BIT(0) +#define PFERR_WRITE_MASK BIT(1) +#define PFERR_USER_MASK BIT(2) +#define PFERR_RSVD_MASK BIT(3) +#define PFERR_FETCH_MASK BIT(4) +#define PFERR_PK_MASK BIT(5) +#define PFERR_SS_MASK BIT(6) +#define PFERR_SGX_MASK BIT(15) +#define PFERR_GUEST_RMP_MASK BIT_ULL(31) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(32) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(33) +#define PFERR_GUEST_ENC_MASK BIT_ULL(34) +#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) +#define PFERR_GUEST_VMPL_MASK BIT_ULL(36) /* - * The mask used to denote special SPTEs, which can be either MMIO SPTEs or - * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting - * with the SVE bit in EPT PTEs. + * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks + * when emulating instructions that triggers implicit access. */ -#define SPTE_SPECIAL_MASK (1ULL << 62) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(48) +/* + * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred + * when the guest was accessing private memory. + */ +#define PFERR_PRIVATE_ACCESS BIT_ULL(49) +#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 @@ -236,41 +298,66 @@ enum { */ #define KVM_APIC_PV_EOI_PENDING 1 +struct kvm_kernel_irqfd; struct kvm_kernel_irq_routing_entry; /* - * We don't want allocation failures within the mmu code, so we preallocate - * enough memory for a single page fault in a cache. - */ -struct kvm_mmu_memory_cache { - int nobjs; - void *objects[KVM_NR_MEM_OBJS]; -}; - -/* - * the pages used as guest page table on soft mmu are tracked by - * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used - * by indirect shadow page can not be more than 15 bits. + * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page + * also includes TDP pages) to determine whether or not a page can be used in + * the given MMU context. This is a subset of the overall kvm_cpu_role to + * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows + * allocating 2 bytes per gfn instead of 4 bytes per gfn. + * + * Upper-level shadow pages having gptes are tracked for write-protection via + * gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must + * not create more than 2^16-1 upper-level shadow pages at a single gfn, + * otherwise gfn_write_track will overflow and explosions will ensue. * - * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, - * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. + * A unique shadow page (SP) for a gfn is created if and only if an existing SP + * cannot be reused. The ability to reuse a SP is tracked by its role, which + * incorporates various mode bits and properties of the SP. Roughly speaking, + * the number of unique SPs that can theoretically be created is 2^n, where n + * is the number of bits that are used to compute the role. + * + * But, even though there are 20 bits in the mask below, not all combinations + * of modes and flags are possible: + * + * - invalid shadow pages are not accounted, mirror pages are not shadowed, + * so the bits are effectively 18. + * + * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); + * execonly and ad_disabled are only used for nested EPT which has + * has_4_byte_gpte=0. Therefore, 2 bits are always unused. + * + * - the 4 bits of level are effectively limited to the values 2/3/4/5, + * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE + * paging has exactly one upper level, making level completely redundant + * when has_4_byte_gpte=1. + * + * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if + * cr0_wp=0, therefore these three bits only give rise to 5 possibilities. + * + * Therefore, the maximum number of possible upper-level shadow pages for a + * single gfn is a bit less than 2^13. */ union kvm_mmu_page_role { u32 word; struct { unsigned level:4; - unsigned cr4_pae:1; + unsigned has_4_byte_gpte:1; unsigned quadrant:2; unsigned direct:1; unsigned access:3; unsigned invalid:1; - unsigned nxe:1; + unsigned efer_nx:1; unsigned cr0_wp:1; unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; unsigned ad_disabled:1; unsigned guest_mode:1; - unsigned :6; + unsigned passthrough:1; + unsigned is_mirror:1; + unsigned :4; /* * This is left at the top of the word so that @@ -282,27 +369,40 @@ union kvm_mmu_page_role { }; }; -union kvm_mmu_extended_role { /* - * This structure complements kvm_mmu_page_role caching everything needed for - * MMU configuration. If nothing in both these structures changed, MMU - * re-configuration can be skipped. @valid bit is set on first usage so we don't - * treat all-zero structure as valid data. + * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties + * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, + * including on nested transitions, if nothing in the full role changes then + * MMU re-configuration can be skipped. @valid bit is set on first usage so we + * don't treat all-zero structure as valid data. + * + * The properties that are tracked in the extended role but not the page role + * are for things that either (a) do not affect the validity of the shadow page + * or (b) are indirectly reflected in the shadow page's role. For example, + * CR4.PKE only affects permission checks for software walks of the guest page + * tables (because KVM doesn't support Protection Keys with shadow paging), and + * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. + * + * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. + * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and + * SMAP, but the MMU's permission checks for software walks need to be SMEP and + * SMAP aware regardless of CR0.WP. */ +union kvm_mmu_extended_role { u32 word; struct { unsigned int valid:1; unsigned int execonly:1; - unsigned int cr0_pg:1; unsigned int cr4_pse:1; unsigned int cr4_pke:1; unsigned int cr4_smap:1; unsigned int cr4_smep:1; unsigned int cr4_la57:1; + unsigned int efer_lma:1; }; }; -union kvm_mmu_role { +union kvm_cpu_role { u64 as_u64; struct { union kvm_mmu_page_role base; @@ -311,43 +411,7 @@ union kvm_mmu_role { }; struct kvm_rmap_head { - unsigned long val; -}; - -struct kvm_mmu_page { - struct list_head link; - struct hlist_node hash_link; - bool unsync; - - /* - * The following two entries are used to key the shadow page in the - * hash table. - */ - union kvm_mmu_page_role role; - gfn_t gfn; - - u64 *spt; - /* hold the gfn of each spte inside spt */ - gfn_t *gfns; - int root_count; /* Currently serving as active root */ - unsigned int unsync_children; - struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ - - /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ - unsigned long mmu_valid_gen; - - DECLARE_BITMAP(unsync_child_bitmap, 512); - -#ifdef CONFIG_X86_32 - /* - * Used out of the mmu-lock to avoid reading spte values while an - * update is in progress; see the comments in __get_spte_lockless(). - */ - int clear_spte_count; -#endif - - /* Number of writes since the last time traversal visited this page. */ - atomic_t write_flooding_count; + atomic_long_t val; }; struct kvm_pio_request { @@ -365,51 +429,44 @@ struct rsvd_bits_validate { }; struct kvm_mmu_root_info { - gpa_t cr3; + gpa_t pgd; hpa_t hpa; }; #define KVM_MMU_ROOT_INFO_INVALID \ - ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) + ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) #define KVM_MMU_NUM_PREV_ROOTS 3 +#define KVM_MMU_ROOT_CURRENT BIT(0) +#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) +#define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1) + +#define KVM_HAVE_MMU_RWLOCK + +struct kvm_mmu_page; +struct kvm_page_fault; + /* * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the * current mmu mode. */ struct kvm_mmu { - void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); - unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); + unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, - bool prefault); + int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gpa_t gva_or_gpa, u64 access, struct x86_exception *exception); - gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, - struct x86_exception *exception); - int (*sync_page)(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp); - void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); - void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte); - hpa_t root_hpa; - union kvm_mmu_role mmu_role; - u8 root_level; - u8 shadow_root_level; - u8 ept_ad; - bool direct_map; - struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; - - /* - * Bitmap; bit set = permission fault - * Byte index: page fault error code [4:1] - * Bit index: pte permissions in ACC_* format - */ - u8 permissions[16]; + int (*sync_spte)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp, int i); + struct kvm_mmu_root_info root; + hpa_t mirror_root_hpa; + union kvm_cpu_role cpu_role; + union kvm_mmu_page_role root_role; /* * The pkru_mask indicates if protection key checks are needed. It @@ -419,8 +476,18 @@ struct kvm_mmu { */ u32 pkru_mask; + struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; + + /* + * Bitmap; bit set = permission fault + * Byte index: page fault error code [4:1] + * Bit index: pte permissions in ACC_* format + */ + u8 permissions[16]; + u64 *pae_root; - u64 *lm_root; + u64 *pml4_root; + u64 *pml5_root; /* * check zero bits on shadow page table entries, these @@ -431,19 +498,9 @@ struct kvm_mmu { struct rsvd_bits_validate guest_rsvd_check; - /* Can have large pages at levels 2..last_nonleaf_level-1. */ - u8 last_nonleaf_level; - - bool nx; - u64 pdptrs[4]; /* pae */ }; -struct kvm_tlb_range { - u64 start_gfn; - u64 pages; -}; - enum pmc_type { KVM_PMC_GP = 0, KVM_PMC_FIXED, @@ -452,50 +509,128 @@ enum pmc_type { struct kvm_pmc { enum pmc_type type; u8 idx; + bool is_paused; + bool intr; + /* + * Base value of the PMC counter, relative to the *consumed* count in + * the associated perf_event. This value includes counter updates from + * the perf_event and emulated_count since the last time the counter + * was reprogrammed, but it is *not* the current value as seen by the + * guest or userspace. + * + * The count is relative to the associated perf_event so that KVM + * doesn't need to reprogram the perf_event every time the guest writes + * to the counter. + */ u64 counter; + /* + * PMC events triggered by KVM emulation that haven't been fully + * processed, i.e. haven't undergone overflow detection. + */ + u64 emulated_counter; u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; + /* + * only for creating or reusing perf_event, + * eventsel value for general purpose counters, + * ctrl value for fixed counters. + */ + u64 current_config; }; +/* More counters may conflict with other existing Architectural MSRs */ +#define KVM_MAX(a, b) ((a) >= (b) ? (a) : (b)) +#define KVM_MAX_NR_INTEL_GP_COUNTERS 8 +#define KVM_MAX_NR_AMD_GP_COUNTERS 6 +#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \ + KVM_MAX_NR_AMD_GP_COUNTERS) + +#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3 +#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0 +#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \ + KVM_MAX_NR_AMD_FIXED_COUNTERS) + struct kvm_pmu { + u8 version; unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; unsigned available_event_types; u64 fixed_ctr_ctrl; + u64 fixed_ctr_ctrl_rsvd; u64 global_ctrl; u64 global_status; - u64 global_ovf_ctrl; u64 counter_bitmask[2]; - u64 global_ctrl_mask; + u64 global_ctrl_rsvd; + u64 global_status_rsvd; u64 reserved_bits; - u8 version; - struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; - struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; - struct irq_work irq_work; - u64 reprogram_pmi; + u64 raw_event_mask; + struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS]; + struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS]; + + /* + * Overlay the bitmap with a 64-bit atomic so that all bits can be + * set in a single access, e.g. to reprogram all counters when the PMU + * filter changes. + */ + union { + DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + atomic64_t __reprogram_pmi; + }; + DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + + DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX); + + u64 ds_area; + u64 pebs_enable; + u64 pebs_enable_rsvd; + u64 pebs_data_cfg; + u64 pebs_data_cfg_rsvd; + + /* + * If a guest counter is cross-mapped to host counter with different + * index, its PEBS capability will be temporarily disabled. + * + * The user should make sure that this mask is updated + * after disabling interrupts and before perf_guest_get_msrs(); + */ + u64 host_cross_mapped_mask; + + /* + * The gate to release perf_events not marked in + * pmc_in_use only once in a vcpu time slice. + */ + bool need_cleanup; + + /* + * The total number of programmed perf_events and it helps to avoid + * redundant check before cleanup if guest don't use vPMU at all. + */ + u8 event_count; }; struct kvm_pmu_ops; enum { - KVM_DEBUGREG_BP_ENABLED = 1, - KVM_DEBUGREG_WONT_EXIT = 2, - KVM_DEBUGREG_RELOAD = 4, -}; - -struct kvm_mtrr_range { - u64 base; - u64 mask; - struct list_head node; + KVM_DEBUGREG_BP_ENABLED = BIT(0), + KVM_DEBUGREG_WONT_EXIT = BIT(1), + /* + * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by + * hardware on exit from or enter to guest. KVM needn't switch them. + * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM + * exit, host values need to be restored. + */ + KVM_DEBUGREG_AUTO_SWITCH = BIT(2), }; struct kvm_mtrr { - struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR]; - mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION]; + u64 var[KVM_NR_VAR_MTRR * 2]; + u64 fixed_64k; + u64 fixed_16k[2]; + u64 fixed_4k[8]; u64 deftype; - - struct list_head head; }; /* Hyper-V SynIC timer */ @@ -523,8 +658,32 @@ struct kvm_vcpu_hv_synic { bool dont_zero_synic_pages; }; +/* The maximum number of entries on the TLB flush fifo. */ +#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16) +/* + * Note: the following 'magic' entry is made up by KVM to avoid putting + * anything besides GVA on the TLB flush fifo. It is theoretically possible + * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000 + * which will look identical. KVM's action to 'flush everything' instead of + * flushing these particular addresses is, however, fully legitimate as + * flushing more than requested is always OK. + */ +#define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1) + +enum hv_tlb_flush_fifos { + HV_L1_TLB_FLUSH_FIFO, + HV_L2_TLB_FLUSH_FIFO, + HV_NR_TLB_FLUSH_FIFOS, +}; + +struct kvm_vcpu_hv_tlb_flush_fifo { + spinlock_t write_lock; + DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE); +}; + /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { + struct kvm_vcpu *vcpu; u32 vp_index; u64 hv_vapic; s64 runtime_offset; @@ -532,7 +691,94 @@ struct kvm_vcpu_hv { struct kvm_hyperv_exit exit; struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); - cpumask_t tlb_flush; + bool enforce_cpuid; + struct { + u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ + u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ + u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ + u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ + u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ + u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ + u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ + u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ + } cpuid_cache; + + struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; + + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ + u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); + + struct hv_vp_assist_page vp_assist_page; + + struct { + u64 pa_page_gpa; + u64 vm_id; + u32 vp_id; + } nested; +}; + +struct kvm_hypervisor_cpuid { + u32 base; + u32 limit; +}; + +#ifdef CONFIG_KVM_XEN +/* Xen HVM per vcpu emulation context */ +struct kvm_vcpu_xen { + u64 hypercall_rip; + u32 current_runstate; + u8 upcall_vector; + struct gfn_to_pfn_cache vcpu_info_cache; + struct gfn_to_pfn_cache vcpu_time_info_cache; + struct gfn_to_pfn_cache runstate_cache; + struct gfn_to_pfn_cache runstate2_cache; + u64 last_steal; + u64 runstate_entry_time; + u64 runstate_times[4]; + unsigned long evtchn_pending_sel; + u32 vcpu_id; /* The Xen / ACPI vCPU ID */ + u32 timer_virq; + u64 timer_expires; /* In guest epoch */ + atomic_t timer_pending; + struct hrtimer timer; + int poll_evtchn; + struct timer_list poll_timer; + struct kvm_hypervisor_cpuid cpuid; +}; +#endif + +struct kvm_queued_exception { + bool pending; + bool injected; + bool has_error_code; + u8 vector; + u32 error_code; + unsigned long payload; + bool has_payload; +}; + +/* + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. + */ +enum kvm_only_cpuid_leafs { + CPUID_12_EAX = NCAPINTS, + CPUID_7_1_EDX, + CPUID_8000_0007_EDX, + CPUID_8000_0022_EAX, + CPUID_7_2_EDX, + CPUID_24_0_EBX, + CPUID_8000_0021_ECX, + CPUID_7_1_ECX, + NR_KVM_CPU_CAPS, + + NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; struct kvm_vcpu_arch { @@ -550,13 +796,15 @@ struct kvm_vcpu_arch { unsigned long cr3; unsigned long cr4; unsigned long cr4_guest_owned_bits; + unsigned long cr4_guest_rsvd_bits; unsigned long cr8; + u32 host_pkru; u32 pkru; u32 hflags; u64 efer; + u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ - bool apicv_active; bool load_eoi_exitmap_pending; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; @@ -565,9 +813,12 @@ struct kvm_vcpu_arch { u64 ia32_misc_enable_msr; u64 smbase; u64 smi_count; + bool at_instruction_boundary; bool tpr_access_reporting; - u64 ia32_xss; + bool xfd_no_write_intercept; u64 microcode_version; + u64 arch_capabilities; + u64 perf_capabilities; /* * Paging state of the vcpu @@ -588,7 +839,7 @@ struct kvm_vcpu_arch { * Paging state of an L2 guest (used for nested npt) * * This context will save all necessary information to walk page tables - * of the an L2 guest. This context is only initialized for page table + * of an L2 guest. This context is only initialized for page table * walking and not for faulting since we never handle l2 page faults on * the host. */ @@ -601,41 +852,46 @@ struct kvm_vcpu_arch { struct kvm_mmu *walk_mmu; struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; - struct kvm_mmu_memory_cache mmu_page_cache; + struct kvm_mmu_memory_cache mmu_shadow_page_cache; + struct kvm_mmu_memory_cache mmu_shadowed_info_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * This cache is to allocate external page table. E.g. private EPT used + * by the TDX module. + */ + struct kvm_mmu_memory_cache mmu_external_spt_cache; /* * QEMU userspace and the guest each have their own FPU state. - * In vcpu_run, we switch between the user, maintained in the - * task_struct struct, and guest FPU contexts. While running a VCPU, - * the VCPU thread will have the guest FPU context. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. * * Note that while the PKRU state lives inside the fpu registers, * it is switched out separately at VMENTER and VMEXIT time. The - * "guest_fpu" state here contains the guest FPU context, with the + * "guest_fpstate" state here contains the guest FPU context, with the * host PRKU bits. */ - struct fpu *guest_fpu; + struct fpu_guest guest_fpu; u64 xcr0; u64 guest_supported_xcr0; - u32 guest_xstate_size; + u64 ia32_xss; + u64 guest_supported_xss; struct kvm_pio_request pio; void *pio_data; + void *sev_pio_data; + unsigned sev_pio_count; u8 event_exit_inst_len; - struct kvm_queued_exception { - bool pending; - bool injected; - bool has_error_code; - u8 nr; - u32 error_code; - unsigned long payload; - bool has_payload; - u8 nested_apf; - } exception; + bool exception_from_userspace; + + /* Exceptions to be injected to the guest. */ + struct kvm_queued_exception exception; + /* Exception VM-Exits to be synthesized to L1. */ + struct kvm_queued_exception exception_vmexit; struct kvm_queued_interrupt { bool injected; @@ -646,33 +902,55 @@ struct kvm_vcpu_arch { int halt_request; /* real mode on Intel only */ int cpuid_nent; - struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + struct kvm_cpuid_entry2 *cpuid_entries; + bool cpuid_dynamic_bits_dirty; + bool is_amd_compatible; + /* + * cpu_caps holds the effective guest capabilities, i.e. the features + * the vCPU is allowed to use. Typically, but not always, features can + * be used by the guest if and only if both KVM and userspace want to + * expose the feature to the guest. + * + * A common exception is for virtualization holes, i.e. when KVM can't + * prevent the guest from using a feature, in which case the vCPU "has" + * the feature regardless of what KVM or userspace desires. + * + * Note, features that don't require KVM involvement in any way are + * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the + * guest CPUID provided by userspace. + */ + u32 cpu_caps[NR_KVM_CPU_CAPS]; + + u64 reserved_gpa_bits; int maxphyaddr; /* emulate context */ - struct x86_emulate_ctxt emulate_ctxt; + struct x86_emulate_ctxt *emulate_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); + unsigned long cui_linear_rip; + int cui_rdmsr_imm_reg; gpa_t time; - struct pvclock_vcpu_time_info hv_clock; + s8 pvclock_tsc_shift; + u32 pvclock_tsc_mul; unsigned int hw_tsc_khz; - struct gfn_to_hva_cache pv_time; - bool pv_time_enabled; + struct gfn_to_pfn_cache pv_time; /* set guest stopped flag in pvclock flags field */ bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_hva_cache cache; } st; - u64 tsc_offset; + u64 l1_tsc_offset; + u64 tsc_offset; /* current tsc offset */ u64 last_guest_tsc; u64 last_host_tsc; u64 tsc_offset_adjustment; @@ -685,12 +963,16 @@ struct kvm_vcpu_arch { u32 virtual_tsc_mult; u32 virtual_tsc_khz; s64 ia32_tsc_adjust_msr; - u64 tsc_scaling_ratio; + u64 msr_ia32_power_ctl; + u64 l1_tsc_scaling_ratio; + u64 tsc_scaling_ratio; /* current scaling ratio */ atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ - unsigned nmi_pending; /* NMI queued after currently running handler */ + /* Number of NMIs pending injection, not including hardware vNMIs. */ + unsigned int nmi_pending; bool nmi_injected; /* Trying to inject an NMI this entry */ bool smi_pending; /* SMI queued after currently running handler */ + u8 handling_intr_from_guest; struct kvm_mtrr mtrr_state; u64 pat; @@ -709,10 +991,11 @@ struct kvm_vcpu_arch { u64 mcg_ctl; u64 mcg_ext_ctl; u64 *mce_banks; + u64 *mci_ctl2_banks; /* Cache MMIO info */ u64 mmio_gva; - unsigned access; + unsigned mmio_access; gfn_t mmio_gfn; u64 mmio_gen; @@ -721,8 +1004,13 @@ struct kvm_vcpu_arch { /* used for guest single stepping over the given code position */ unsigned long singlestep_rip; - struct kvm_vcpu_hv hyperv; - +#ifdef CONFIG_KVM_HYPERV + bool hyperv_enabled; + struct kvm_vcpu_hv *hyperv; +#endif +#ifdef CONFIG_KVM_XEN + struct kvm_vcpu_xen xen; +#endif cpumask_var_t wbinvd_dirty_mask; unsigned long last_retry_eip; @@ -730,14 +1018,16 @@ struct kvm_vcpu_arch { struct { bool halted; - gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + gfn_t gfns[ASYNC_PF_PER_VCPU]; struct gfn_to_hva_cache data; - u64 msr_val; + u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ + u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ + u16 vec; u32 id; - bool send_user_only; - u32 host_apf_reason; - unsigned long nested_apf_token; + u32 host_apf_flags; + bool send_always; bool delivery_as_pf_vmexit; + bool pageready_pending; } apf; /* OSVW MSRs (AMD only) */ @@ -751,15 +1041,7 @@ struct kvm_vcpu_arch { struct gfn_to_hva_cache data; } pv_eoi; - /* - * Indicate whether the access faults on its page table in guest - * which is set when fix page fault and used to detect unhandeable - * instruction. - */ - bool write_fault_to_shadow_pgtable; - - /* set at EPT violation at this point */ - unsigned long exit_qualification; + u64 msr_kvm_poll_control; /* pv related host specific info */ struct { @@ -768,16 +1050,45 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; - - /* GPA available */ - bool gpa_available; - gpa_t gpa_val; + int highest_stale_pending_ioapic_eoi; /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; - /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ - bool l1tf_flush_l1d; + /* Host CPU on which VM-entry was most recently attempted */ + int last_vmentry_cpu; + + /* AMD MSRC001_0015 Hardware Configuration */ + u64 msr_hwcr; + + /* pv related cpuid info */ + struct { + /* + * value of the eax register in the KVM_CPUID_FEATURES CPUID + * leaf. + */ + u32 features; + + /* + * indicates whether pv emulation should be disabled if features + * are not present in the guest's cpuid + */ + bool enforce; + } pv_cpuid; + + /* Protected Guests */ + bool guest_state_protected; + bool guest_tsc_protected; + + /* + * Set when PDPTS were loaded directly by the userspace without + * reading the guest memory + */ + bool pdptrs_from_userspace; + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; +#endif }; struct kvm_lpage_info { @@ -787,23 +1098,34 @@ struct kvm_lpage_info { struct kvm_arch_memory_slot { struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; - unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; + unsigned short *gfn_write_track; }; /* - * We use as the mode the number of bits allocated in the LDR for the - * logical processor ID. It happens that these are all powers of two. - * This makes it is very easy to detect cases where the APICs are - * configured for multiple modes; in that case, we cannot use the map and - * hence cannot use kvm_irq_delivery_to_apic_fast either. + * Track the mode of the optimized logical map, as the rules for decoding the + * destination vary per mode. Enabling the optimized logical map requires all + * software-enabled local APIs to be in the same mode, each addressable APIC to + * be mapped to only one MDA, and each MDA to map to at most one APIC. */ -#define KVM_APIC_MODE_XAPIC_CLUSTER 4 -#define KVM_APIC_MODE_XAPIC_FLAT 8 -#define KVM_APIC_MODE_X2APIC 16 +enum kvm_apic_logical_mode { + /* All local APICs are software disabled. */ + KVM_APIC_MODE_SW_DISABLED, + /* All software enabled local APICs in xAPIC cluster addressing mode. */ + KVM_APIC_MODE_XAPIC_CLUSTER, + /* All software enabled local APICs in xAPIC flat addressing mode. */ + KVM_APIC_MODE_XAPIC_FLAT, + /* All software enabled local APICs in x2APIC mode. */ + KVM_APIC_MODE_X2APIC, + /* + * Optimized map disabled, e.g. not all local APICs in the same logical + * mode, same logical ID assigned to multiple APICs, etc. + */ + KVM_APIC_MODE_MAP_DISABLED, +}; struct kvm_apic_map { struct rcu_head rcu; - u8 mode; + enum kvm_apic_logical_mode logical_mode; u32 max_apic_id; union { struct kvm_lapic *xapic_flat_map[8]; @@ -812,28 +1134,91 @@ struct kvm_apic_map { struct kvm_lapic *phys_map[]; }; +/* Hyper-V synthetic debugger (SynDbg)*/ +struct kvm_hv_syndbg { + struct { + u64 control; + u64 status; + u64 send_page; + u64 recv_page; + u64 pending_page; + } control; + u64 options; +}; + +/* Current state of Hyper-V TSC page clocksource */ +enum hv_tsc_page_status { + /* TSC page was not set up or disabled */ + HV_TSC_PAGE_UNSET = 0, + /* TSC page MSR was written by the guest, update pending */ + HV_TSC_PAGE_GUEST_CHANGED, + /* TSC page update was triggered from the host side */ + HV_TSC_PAGE_HOST_CHANGED, + /* TSC page was properly set up and is currently active */ + HV_TSC_PAGE_SET, + /* TSC page was set up with an inaccessible GPA */ + HV_TSC_PAGE_BROKEN, +}; + +#ifdef CONFIG_KVM_HYPERV /* Hyper-V emulation context */ struct kvm_hv { struct mutex hv_lock; u64 hv_guest_os_id; u64 hv_hypercall; u64 hv_tsc_page; + enum hv_tsc_page_status hv_tsc_page_status; /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; - HV_REFERENCE_TSC_PAGE tsc_ref; + struct ms_hyperv_tsc_page tsc_ref; struct idr conn_to_evt; u64 hv_reenlightenment_control; u64 hv_tsc_emulation_control; u64 hv_tsc_emulation_status; + u64 hv_invtsc_control; /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; + + /* + * How many SynICs use 'AutoEOI' feature + * (protected by arch.apicv_update_lock) + */ + unsigned int synic_auto_eoi_used; + + struct kvm_hv_syndbg hv_syndbg; + + bool xsaves_xsavec_checked; }; +#endif + +struct msr_bitmap_range { + u32 flags; + u32 nmsrs; + u32 base; + unsigned long *bitmap; +}; + +#ifdef CONFIG_KVM_XEN +/* Xen emulation context */ +struct kvm_xen { + struct mutex xen_lock; + u32 xen_version; + bool long_mode; + bool runstate_update_flag; + u8 upcall_vector; + struct gfn_to_pfn_cache shinfo_cache; + struct idr evtchn_ports; + unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + + struct kvm_xen_hvm_config hvm_config; +}; +#endif enum kvm_irqchip_mode { KVM_IRQCHIP_NONE, @@ -841,72 +1226,241 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; -struct kvm_arch { - unsigned int n_used_mmu_pages; - unsigned int n_requested_mmu_pages; - unsigned int n_max_mmu_pages; - unsigned int indirect_shadow_pages; - unsigned long mmu_valid_gen; - struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; +struct kvm_x86_msr_filter { + u8 count; + bool default_allow:1; + struct msr_bitmap_range ranges[16]; +}; + +struct kvm_x86_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 nr_includes; + __u32 nr_excludes; + __u64 *includes; + __u64 *excludes; + __u64 events[]; +}; + +enum kvm_apicv_inhibit { + + /********************************************************************/ + /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ + /********************************************************************/ + + /* + * APIC acceleration is disabled by a module parameter + * and/or not supported in hardware. + */ + APICV_INHIBIT_REASON_DISABLED, + + /* + * APIC acceleration is inhibited because AutoEOI feature is + * being used by a HyperV guest. + */ + APICV_INHIBIT_REASON_HYPERV, + + /* + * APIC acceleration is inhibited because the userspace didn't yet + * enable the kernel/split irqchip. + */ + APICV_INHIBIT_REASON_ABSENT, + + /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ + * (out of band, debug measure of blocking all interrupts on this vCPU) + * was enabled, to avoid AVIC/APICv bypassing it. + */ + APICV_INHIBIT_REASON_BLOCKIRQ, + + /* + * APICv is disabled because not all vCPUs have a 1:1 mapping between + * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack. + */ + APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED, + /* - * Hash table of struct kvm_mmu_page. + * For simplicity, the APIC acceleration is inhibited + * first time either APIC ID or APIC base are changed by the guest + * from their reset values. */ + APICV_INHIBIT_REASON_APIC_ID_MODIFIED, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, + + /******************************************************/ + /* INHIBITs that are relevant only to the AMD's AVIC. */ + /******************************************************/ + + /* + * AVIC is inhibited on a vCPU because it runs a nested guest. + * + * This is needed because unlike APICv, the peers of this vCPU + * cannot use the doorbell mechanism to signal interrupts via AVIC when + * a vCPU runs nested. + */ + APICV_INHIBIT_REASON_NESTED, + + /* + * On SVM, the wait for the IRQ window is implemented with pending vIRQ, + * which cannot be injected when the AVIC is enabled, thus AVIC + * is inhibited while KVM waits for IRQ window. + */ + APICV_INHIBIT_REASON_IRQWIN, + + /* + * PIT (i8254) 're-inject' mode, relies on EOI intercept, + * which AVIC doesn't support for edge triggered interrupts. + */ + APICV_INHIBIT_REASON_PIT_REINJ, + + /* + * AVIC is disabled because SEV doesn't support it. + */ + APICV_INHIBIT_REASON_SEV, + + /* + * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1 + * mapping between logical ID and vCPU. + */ + APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, + + /* + * AVIC is disabled because the vCPU's APIC ID is beyond the max + * supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable. + */ + APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG, + + NR_APICV_INHIBIT_REASONS, +}; + +#define __APICV_INHIBIT_REASON(reason) \ + { BIT(APICV_INHIBIT_REASON_##reason), #reason } + +#define APICV_INHIBIT_REASONS \ + __APICV_INHIBIT_REASON(DISABLED), \ + __APICV_INHIBIT_REASON(HYPERV), \ + __APICV_INHIBIT_REASON(ABSENT), \ + __APICV_INHIBIT_REASON(BLOCKIRQ), \ + __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \ + __APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \ + __APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \ + __APICV_INHIBIT_REASON(NESTED), \ + __APICV_INHIBIT_REASON(IRQWIN), \ + __APICV_INHIBIT_REASON(PIT_REINJ), \ + __APICV_INHIBIT_REASON(SEV), \ + __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \ + __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG) + +struct kvm_possible_nx_huge_pages { + /* + * A list of kvm_mmu_page structs that, if zapped, could possibly be + * replaced by an NX huge page. A shadow page is on this list if its + * existence disallows an NX huge page (nx_huge_page_disallowed is set) + * and there are no other conditions that prevent a huge page, e.g. + * the backing host page is huge, dirtly logging is not enabled for its + * memslot, etc... Note, zapping shadow pages on this list doesn't + * guarantee an NX huge page will be created in its stead, e.g. if the + * guest attempts to execute from the region then KVM obviously can't + * create an NX huge page (without hanging the guest). + */ + struct list_head pages; + u64 nr_pages; +}; + +enum kvm_mmu_type { + KVM_SHADOW_MMU, +#ifdef CONFIG_X86_64 + KVM_TDP_MMU, +#endif + KVM_NR_MMU_TYPES, +}; + +struct kvm_arch { + unsigned long n_used_mmu_pages; + unsigned long n_requested_mmu_pages; + unsigned long n_max_mmu_pages; + unsigned int indirect_shadow_pages; + u8 mmu_valid_gen; + u8 vm_type; + bool has_private_mem; + bool has_protected_state; + bool has_protected_eoi; + bool pre_fault_allowed; + struct hlist_head *mmu_page_hash; struct list_head active_mmu_pages; - struct list_head zapped_obsolete_pages; - struct kvm_page_track_notifier_node mmu_sp_tracker; + struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES]; +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING struct kvm_page_track_notifier_head track_notifier_head; +#endif + /* + * Protects marking pages unsync during page faults, as TDP MMU page + * faults only take mmu_lock for read. For simplicity, the unsync + * pages lock is always taken when marking pages unsync regardless of + * whether mmu_lock is held for read or write. + */ + spinlock_t mmu_unsync_pages_lock; + + u64 shadow_mmio_value; - struct list_head assigned_dev_head; - struct iommu_domain *iommu_domain; - bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; -#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE - atomic_t assigned_device_count; + unsigned long nr_possible_bypass_irqs; + +#ifdef CONFIG_KVM_IOAPIC struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; +#endif atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; - struct kvm_apic_map *apic_map; + struct kvm_apic_map __rcu *apic_map; + atomic_t apic_map_dirty; - bool apic_access_page_done; + bool apic_access_memslot_enabled; + bool apic_access_memslot_inhibited; + + /* Protects apicv_inhibit_reasons */ + struct rw_semaphore apicv_update_lock; + unsigned long apicv_inhibit_reasons; gpa_t wall_clock; - bool mwait_in_guest; - bool hlt_in_guest; - bool pause_in_guest; + u64 disabled_exits; - unsigned long irq_sources_bitmap; s64 kvmclock_offset; + + /* + * This also protects nr_vcpus_matched_tsc which is read from a + * preemption-disabled region, so it must be a raw spinlock. + */ raw_spinlock_t tsc_write_lock; u64 last_tsc_nsec; u64 last_tsc_write; u32 last_tsc_khz; + u64 last_tsc_offset; u64 cur_tsc_nsec; u64 cur_tsc_write; u64 cur_tsc_offset; u64 cur_tsc_generation; int nr_vcpus_matched_tsc; - spinlock_t pvclock_gtod_sync_lock; + u32 default_tsc_khz; + bool user_set_tsc; + u64 apic_bus_cycle_ns; + + seqcount_raw_spinlock_t pvclock_sc; bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; - struct delayed_work kvmclock_update_work; - struct delayed_work kvmclock_sync_work; - - struct kvm_xen_hvm_config xen_hvm_config; - - /* reads protected by irq_srcu, writes by irq_lock */ - struct hlist_head mask_notifier_list; +#ifdef CONFIG_KVM_HYPERV struct kvm_hv hyperv; +#endif - #ifdef CONFIG_KVM_MMU_AUDIT - int audit_point; - #endif +#ifdef CONFIG_KVM_XEN + struct kvm_xen xen; +#endif bool backwards_tsc_observed; bool boot_vcpu_runs_old_kvmclock; @@ -922,26 +1476,175 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + bool has_mapped_host_mmio; bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + + bool triple_fault_event; + + bool bus_lock_detection_enabled; + bool enable_pmu; + + u32 notify_window; + u32 notify_vmexit_flags; + /* + * If exit_on_emulation_error is set, and the in-kernel instruction + * emulator fails to emulate an instruction, allow userspace + * the opportunity to look at it. + */ + bool exit_on_emulation_error; + + /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ + u32 user_space_msr_mask; + struct kvm_x86_msr_filter __rcu *msr_filter; + + u32 hypercall_exit_enabled; + + /* Guest can access the SGX PROVISIONKEY. */ + bool sgx_provisioning_allowed; + + struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; + struct vhost_task *nx_huge_page_recovery_thread; + u64 nx_huge_page_last; + struct once nx_once; + +#ifdef CONFIG_X86_64 +#ifdef CONFIG_KVM_PROVE_MMU + /* + * The number of TDP MMU pages across all roots. Used only to sanity + * check that KVM isn't leaking TDP MMU pages. + */ + atomic64_t tdp_mmu_pages; +#endif + + /* + * List of struct kvm_mmu_pages being used as roots. + * All struct kvm_mmu_pages in the list should have + * tdp_mmu_page set. + * + * For reads, this list is protected by: + * RCU alone or + * the MMU lock in read mode + RCU or + * the MMU lock in write mode + * + * For writes, this list is protected by tdp_mmu_pages_lock; see + * below for the details. + * + * Roots will remain in the list until their tdp_mmu_root_count + * drops to zero, at which point the thread that decremented the + * count to zero should removed the root from the list and clean + * it up, freeing the root after an RCU grace period. + */ + struct list_head tdp_mmu_roots; + + /* + * Protects accesses to the following fields when the MMU lock + * is held in read mode: + * - tdp_mmu_roots (above) + * - the link field of kvm_mmu_page structs used by the TDP MMU + * - possible_nx_huge_pages[KVM_TDP_MMU]; + * - the possible_nx_huge_page_link field of kvm_mmu_page structs used + * by the TDP MMU + * Because the lock is only taken within the MMU lock, strictly + * speaking it is redundant to acquire this lock when the thread + * holds the MMU lock in write mode. However it often simplifies + * the code to do so. + */ + spinlock_t tdp_mmu_pages_lock; +#endif /* CONFIG_X86_64 */ + + /* + * If set, at least one shadow root has been allocated. This flag + * is used as one input when determining whether certain memslot + * related allocations are necessary. + */ + bool shadow_root_allocated; + +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING + /* + * If set, the VM has (or had) an external write tracking user, and + * thus all write tracking metadata has been allocated, even if KVM + * itself isn't using write tracking. + */ + bool external_write_tracking_enabled; +#endif + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; + spinlock_t hv_root_tdp_lock; + struct hv_partition_assist_pg *hv_pa_pg; +#endif + /* + * VM-scope maximum vCPU ID. Used to determine the size of structures + * that increase along with the maximum vCPU ID, in which case, using + * the global KVM_MAX_VCPU_IDS may lead to significant memory waste. + */ + u32 max_vcpu_ids; + + bool disable_nx_huge_pages; + + /* + * Memory caches used to allocate shadow pages when performing eager + * page splitting. No need for a shadowed_info_cache since eager page + * splitting only allocates direct shadow pages. + * + * Protected by kvm->slots_lock. + */ + struct kvm_mmu_memory_cache split_shadow_page_cache; + struct kvm_mmu_memory_cache split_page_header_cache; + + /* + * Memory cache used to allocate pte_list_desc structs while splitting + * huge pages. In the worst case, to split one huge page, 512 + * pte_list_desc structs are needed to add each lower level leaf sptep + * to the rmap plus 1 to extend the parent_ptes rmap of the lower level + * page table. + * + * Protected by kvm->slots_lock. + */ +#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) + struct kvm_mmu_memory_cache split_desc_cache; + + gfn_t gfn_direct_bits; + + /* + * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero + * value indicates CPU dirty logging is unsupported or disabled in + * current VM. + */ + int cpu_dirty_log_size; }; struct kvm_vm_stat { - ulong mmu_shadow_zapped; - ulong mmu_pte_write; - ulong mmu_pte_updated; - ulong mmu_pde_zapped; - ulong mmu_flooded; - ulong mmu_recycled; - ulong mmu_cache_miss; - ulong mmu_unsync; - ulong remote_tlb_flush; - ulong lpages; - ulong max_mmu_page_hash_collisions; + struct kvm_vm_stat_generic generic; + u64 mmu_shadow_zapped; + u64 mmu_pte_write; + u64 mmu_pde_zapped; + u64 mmu_flooded; + u64 mmu_recycled; + u64 mmu_cache_miss; + u64 mmu_unsync; + union { + struct { + atomic64_t pages_4k; + atomic64_t pages_2m; + atomic64_t pages_1g; + }; + atomic64_t pages[KVM_NR_PAGE_SIZES]; + }; + u64 nx_lpage_splits; + u64 max_mmu_page_hash_collisions; + u64 max_mmu_rmap_size; }; struct kvm_vcpu_stat { + struct kvm_vcpu_stat_generic generic; + u64 pf_taken; u64 pf_fixed; + u64 pf_emulate; + u64 pf_spurious; + u64 pf_fast; + u64 pf_mmio_spte_created; u64 pf_guest; u64 tlb_flush; u64 invlpg; @@ -954,10 +1657,6 @@ struct kvm_vcpu_stat { u64 nmi_window_exits; u64 l1d_flush; u64 halt_exits; - u64 halt_successful_poll; - u64 halt_attempted_poll; - u64 halt_poll_invalid; - u64 halt_wakeup; u64 request_irq_exits; u64 irq_exits; u64 host_state_reload; @@ -968,6 +1667,13 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; + u64 nested_run; + u64 directed_yield_attempted; + u64 directed_yield_successful; + u64 preemption_reported; + u64 preemption_other; + u64 guest_mode; + u64 notify_window_exits; }; struct x86_instruction_info; @@ -989,65 +1695,86 @@ struct kvm_lapic_irq { bool msi_redir_hint; }; +static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) +{ + return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; +} + +enum kvm_x86_run_flags { + KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), + KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), +}; + struct kvm_x86_ops { - int (*cpu_has_kvm_support)(void); /* __init */ - int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void); - void (*hardware_disable)(void); - void (*check_processor_compatibility)(void *rtn); - int (*hardware_setup)(void); /* __init */ - void (*hardware_unsetup)(void); /* __exit */ - bool (*cpu_has_accelerated_tpr)(void); - bool (*has_emulated_msr)(int index); - void (*cpuid_update)(struct kvm_vcpu *vcpu); - - struct kvm *(*vm_alloc)(void); - void (*vm_free)(struct kvm *); + const char *name; + + int (*check_processor_compatibility)(void); + + int (*enable_virtualization_cpu)(void); + void (*disable_virtualization_cpu)(void); + cpu_emergency_virt_cb *emergency_disable_virtualization_cpu; + + void (*hardware_unsetup)(void); + bool (*has_emulated_msr)(struct kvm *kvm, u32 index); + void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); + + unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); + void (*vm_pre_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ - struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); + int (*vcpu_precreate)(struct kvm *kvm); + int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); - void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); + void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - void (*update_bp_intercept)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); void (*get_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int (*get_cpl)(struct kvm_vcpu *vcpu); + int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu); void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); - void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); - void (*decache_cr3)(struct kvm_vcpu *vcpu); - void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); + bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); - int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); - void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); + void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - u64 (*get_dr6)(struct kvm_vcpu *vcpu); - void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - - void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); - int (*tlb_remote_flush)(struct kvm *kvm); - int (*tlb_remote_flush_with_range)(struct kvm *kvm, - struct kvm_tlb_range *range); + bool (*get_if_flag)(struct kvm_vcpu *vcpu); + + void (*flush_tlb_all)(struct kvm_vcpu *vcpu); + void (*flush_tlb_current)(struct kvm_vcpu *vcpu); +#if IS_ENABLED(CONFIG_HYPERV) + int (*flush_remote_tlbs)(struct kvm *kvm); + int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn, + gfn_t nr_pages); +#endif /* * Flush any TLB entries associated with the given GVA. @@ -1055,145 +1782,194 @@ struct kvm_x86_ops { * Can potentially get non-canonical addresses through INVLPGs, which * the implementation may choose to ignore if appropriate. */ - void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); + void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr); - void (*run)(struct kvm_vcpu *vcpu); - int (*handle_exit)(struct kvm_vcpu *vcpu); - void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + /* + * Flush any TLB entries created by the guest. Like tlb_flush_gva(), + * does not need to flush GPA->HPA mappings. + */ + void (*flush_tlb_guest)(struct kvm_vcpu *vcpu); + + int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); + enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu, + u64 run_flags); + int (*handle_exit)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath); + int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); - void (*set_irq)(struct kvm_vcpu *vcpu); - void (*set_nmi)(struct kvm_vcpu *vcpu); - void (*queue_exception)(struct kvm_vcpu *vcpu); + void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected); + void (*inject_nmi)(struct kvm_vcpu *vcpu); + void (*inject_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); - int (*interrupt_allowed)(struct kvm_vcpu *vcpu); - int (*nmi_allowed)(struct kvm_vcpu *vcpu); + int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); + /* Whether or not a virtual NMI is pending in hardware. */ + bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu); + /* + * Attempt to pend a virtual NMI in hardware. Returns %true on success + * to allow using static_call_ret0 as the fallback. + */ + bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - bool (*get_enable_apicv)(struct kvm_vcpu *vcpu); + + const bool x2apic_icr_is_split; + const unsigned long required_apicv_inhibits; + bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); - void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); - bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); - void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); + void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); - int (*get_tdp_level)(struct kvm_vcpu *vcpu); - u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); - int (*get_lpage_level)(void); - bool (*rdtscp_supported)(void); - bool (*invpcid_supported)(void); + u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); + + void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, + int root_level); + + /* Update external mapping with page table link. */ + int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *external_spt); + /* Update the external page table from spte getting set. */ + int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + u64 mirror_spte); - void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + /* Update external page tables for page table about to be freed. */ + int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *external_spt); - void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); + /* Update external page table from spte getting removed, and flush TLB. */ + void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + u64 mirror_spte); bool (*has_wbinvd_exit)(void); - u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); - /* Returns actual tsc_offset set in active VMCS */ - u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); + u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); + void (*write_tsc_offset)(struct kvm_vcpu *vcpu); + void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + /* + * Retrieve somewhat arbitrary exit/entry information. Intended to + * be used only from within tracepoints or error paths. + */ + void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, + u32 *intr_info, u32 *error_code); + + void (*get_entry_info)(struct kvm_vcpu *vcpu, + u32 *intr_info, u32 *error_code); int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, - enum x86_intercept_stage stage); - void (*handle_external_intr)(struct kvm_vcpu *vcpu); - bool (*mpx_supported)(void); - bool (*xsaves_supported)(void); - bool (*umip_emulated)(void); - bool (*pt_supported)(void); - - int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); - void (*request_immediate_exit)(struct kvm_vcpu *vcpu); - - void (*sched_in)(struct kvm_vcpu *kvm, int cpu); + enum x86_intercept_stage stage, + struct x86_exception *exception); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); - /* - * Arch-specific dirty logging hooks. These hooks are only supposed to - * be valid if the specific arch has hardware-accelerated dirty logging - * mechanism. Currently only for PML on VMX. - * - * - slot_enable_log_dirty: - * called when enabling log dirty mode for the slot. - * - slot_disable_log_dirty: - * called when disabling log dirty mode for the slot. - * also called when slot is created with log dirty disabled. - * - flush_log_dirty: - * called before reporting dirty_bitmap to userspace. - * - enable_log_dirty_pt_masked: - * called when reenabling log dirty for the GFNs in the mask after - * corresponding bits are cleared in slot->dirty_bitmap. - */ - void (*slot_enable_log_dirty)(struct kvm *kvm, - struct kvm_memory_slot *slot); - void (*slot_disable_log_dirty)(struct kvm *kvm, - struct kvm_memory_slot *slot); - void (*flush_log_dirty)(struct kvm *kvm); - void (*enable_log_dirty_pt_masked)(struct kvm *kvm, - struct kvm_memory_slot *slot, - gfn_t offset, unsigned long mask); - int (*write_log_dirty)(struct kvm_vcpu *vcpu); - - /* pmu operations of sub-arch */ - const struct kvm_pmu_ops *pmu_ops; + void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); - /* - * Architecture specific hooks for vCPU blocking due to - * HLT instruction. - * Returns for .pre_block(): - * - 0 means continue to block the vCPU. - * - 1 means we cannot block the vCPU since some event - * happens during this period, such as, 'ON' bit in - * posted-interrupts descriptor is set. - */ - int (*pre_block)(struct kvm_vcpu *vcpu); - void (*post_block)(struct kvm_vcpu *vcpu); + const struct kvm_x86_nested_ops *nested_ops; void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); - int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); + int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_vcpu *vcpu, u32 vector); + void (*pi_start_bypass)(struct kvm *kvm); + void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); + bool (*protected_apic_has_interrupt)(struct kvm_vcpu *vcpu); - int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc); + int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired); void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); void (*setup_mce)(struct kvm_vcpu *vcpu); - int (*get_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - unsigned user_data_size); - int (*set_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - struct kvm_nested_state *kvm_state); - void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); +#ifdef CONFIG_KVM_SMM + int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram); + int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram); + void (*enable_smi_window)(struct kvm_vcpu *vcpu); +#endif + + int (*dev_get_attr)(u32 group, u64 attr, u64 *val); + int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); + int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); + int (*vcpu_mem_enc_unlocked_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); + int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); + int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); + int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); + int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); + void (*guest_memory_reclaimed)(struct kvm *kvm); + + int (*get_feature_msr)(u32 msr, u64 *data); + + int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); + + bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); + int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); + + void (*migrate_timers)(struct kvm_vcpu *vcpu); + void (*recalc_intercepts)(struct kvm_vcpu *vcpu); + int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); + + void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); + + /* + * Returns vCPU specific APICv inhibit reasons + */ + unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); - int (*smi_allowed)(struct kvm_vcpu *vcpu); - int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); - int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); - int (*enable_smi_window)(struct kvm_vcpu *vcpu); + gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); + void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); + int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); + void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); + int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); +}; - int (*mem_enc_op)(struct kvm *kvm, void __user *argp); - int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); - int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); +struct kvm_x86_nested_ops { + void (*leave_nested)(struct kvm_vcpu *vcpu); + bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, + u32 error_code); + int (*check_events)(struct kvm_vcpu *vcpu); + bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection); + void (*triple_fault)(struct kvm_vcpu *vcpu); + int (*get_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + unsigned user_data_size); + int (*set_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state); + bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); + + int (*enable_evmcs)(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version); + uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); + void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu); +}; - int (*get_msr_feature)(struct kvm_msr_entry *entry); +struct kvm_x86_init_ops { + int (*hardware_setup)(void); + unsigned int (*handle_intel_pt_intr)(void); - int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, - uint16_t *vmcs_version); - uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); + struct kvm_x86_ops *runtime_ops; + struct kvm_pmu_ops *pmu_ops; }; struct kvm_arch_async_pf { @@ -1201,160 +1977,264 @@ struct kvm_arch_async_pf { gfn_t gfn; unsigned long cr3; bool direct_map; + u64 error_code; }; -extern struct kvm_x86_ops *kvm_x86_ops; -extern struct kmem_cache *x86_fpu_cache; +extern u32 __read_mostly kvm_nr_uret_msrs; +extern bool __read_mostly allow_smaller_maxphyaddr; +extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; +extern bool __read_mostly enable_device_posted_irqs; +extern struct kvm_x86_ops kvm_x86_ops; + +#define kvm_x86_call(func) static_call(kvm_x86_##func) +#define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func) + +#define KVM_X86_OP(func) \ + DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func)); +#define KVM_X86_OP_OPTIONAL KVM_X86_OP +#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP +#include <asm/kvm-x86-ops.h> + +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops); +void kvm_x86_vendor_exit(void); #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return kvm_x86_ops->vm_alloc(); + return kvzalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT); } -static inline void kvm_arch_free_vm(struct kvm *kvm) -{ - return kvm_x86_ops->vm_free(kvm); -} +#define __KVM_HAVE_ARCH_VM_FREE +void kvm_arch_free_vm(struct kvm *kvm); -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#if IS_ENABLED(CONFIG_HYPERV) +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { - if (kvm_x86_ops->tlb_remote_flush && - !kvm_x86_ops->tlb_remote_flush(kvm)) + if (kvm_x86_ops.flush_remote_tlbs && + !kvm_x86_call(flush_remote_tlbs)(kvm)) return 0; else return -ENOTSUPP; } -int kvm_mmu_module_init(void); -void kvm_mmu_module_exit(void); +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE +static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, + u64 nr_pages) +{ + if (!kvm_x86_ops.flush_remote_tlbs_range) + return -EOPNOTSUPP; + + return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages); +} +#endif /* CONFIG_HYPERV */ + +enum kvm_intr_type { + /* Values are arbitrary, but must be non-zero. */ + KVM_HANDLING_IRQ = 1, + KVM_HANDLING_NMI, +}; + +/* Enable perf NMI and timer modes to work, and minimise false positives. */ +#define kvm_arch_pmi_in_guest(vcpu) \ + ((vcpu) && (vcpu)->arch.handling_intr_from_guest && \ + (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI))) + +void __init kvm_mmu_x86_module_init(void); +int kvm_mmu_vendor_module_init(void); +void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_init_vm(struct kvm *kvm); +int kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); -void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, - u64 acc_track_mask, u64 me_mask); +void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, + struct kvm_memory_slot *slot); + +void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, - struct kvm_memory_slot *memslot); -void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, - const struct kvm_memory_slot *memslot); + const struct kvm_memory_slot *memslot, + int start_level); +void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + int target_level); +void kvm_mmu_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + u64 start, u64 end, + int target_level); +void kvm_mmu_recover_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, - struct kvm_memory_slot *memslot); -void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, - struct kvm_memory_slot *memslot); -void kvm_mmu_slot_set_dirty(struct kvm *kvm, - struct kvm_memory_slot *memslot); -void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, - struct kvm_memory_slot *slot, - gfn_t gfn_offset, unsigned long mask); -void kvm_mmu_zap_all(struct kvm *kvm); -void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots); -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); - -int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); -bool pdptrs_changed(struct kvm_vcpu *vcpu); + const struct kvm_memory_slot *memslot); +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); + +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); -struct kvm_irq_mask_notifier { - void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); - int irq; - struct hlist_node link; -}; - -void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, - bool mask); - extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); -/* control of guest tsc rate supported? */ -extern bool kvm_has_tsc_control; -/* maximum supported tsc_khz for guests */ -extern u32 kvm_max_guest_tsc_khz; -/* number of bits of the fractional part of the TSC scaling ratio */ -extern u8 kvm_tsc_scaling_ratio_frac_bits; -/* maximum allowed value of TSC scaling ratio */ -extern u64 kvm_max_tsc_scaling_ratio; -/* 1ull << kvm_tsc_scaling_ratio_frac_bits */ -extern u64 kvm_default_tsc_scaling_ratio; - -extern u64 kvm_mce_cap_supported; - -enum emulation_result { - EMULATE_DONE, /* no further processing */ - EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ - EMULATE_FAIL, /* can't emulate this instruction */ -}; - +/* + * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing + * userspace I/O) to indicate that the emulation context + * should be reused as is, i.e. skip initialization of + * emulation context, instruction fetch and decode. + * + * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. + * Indicates that only select instructions (tagged with + * EmulateOnUD) should be emulated (to minimize the emulator + * attack surface). See also EMULTYPE_TRAP_UD_FORCED. + * + * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to + * decode the instruction length. For use *only* by + * kvm_x86_ops.skip_emulated_instruction() implementations if + * EMULTYPE_COMPLETE_USER_EXIT is not set. + * + * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to + * retry native execution under certain conditions, + * Can only be set in conjunction with EMULTYPE_PF. + * + * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was + * triggered by KVM's magic "force emulation" prefix, + * which is opt in via module param (off by default). + * Bypasses EmulateOnUD restriction despite emulating + * due to an intercepted #UD (see EMULTYPE_TRAP_UD). + * Used to test the full emulator from userspace. + * + * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware + * backdoor emulation, which is opt in via module param. + * VMware backdoor emulation handles select instructions + * and reinjects the #GP for all other cases. + * + * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case + * the CR2/GPA value pass on the stack is valid. + * + * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility + * state and inject single-step #DBs after skipping + * an instruction (after completing userspace I/O). + * + * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that + * is attempting to write a gfn that contains one or + * more of the PTEs used to translate the write itself, + * and the owning page table is being shadowed by KVM. + * If emulation of the faulting instruction fails and + * this flag is set, KVM will exit to userspace instead + * of retrying emulation as KVM cannot make forward + * progress. + * + * If emulation fails for a write to guest page tables, + * KVM unprotects (zaps) the shadow page for the target + * gfn and resumes the guest to retry the non-emulatable + * instruction (on hardware). Unprotecting the gfn + * doesn't allow forward progress for a self-changing + * access because doing so also zaps the translation for + * the gfn, i.e. retrying the instruction will hit a + * !PRESENT fault, which results in a new shadow page + * and sends KVM back to square one. + * + * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip + * an instruction if it could generate a given software + * interrupt, which must be encoded via + * EMULTYPE_SET_SOFT_INT_VECTOR(). + */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -#define EMULTYPE_ALLOW_RETRY (1 << 3) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) -#define EMULTYPE_VMWARE (1 << 5) +#define EMULTYPE_ALLOW_RETRY_PF (1 << 3) +#define EMULTYPE_TRAP_UD_FORCED (1 << 4) +#define EMULTYPE_VMWARE_GP (1 << 5) +#define EMULTYPE_PF (1 << 6) +#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) +#define EMULTYPE_WRITE_PF_TO_SP (1 << 8) +#define EMULTYPE_SKIP_SOFT_INT (1 << 9) + +#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16) +#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff) + +static inline bool kvm_can_emulate_event_vectoring(int emul_type) +{ + return !(emul_type & EMULTYPE_PF); +} + int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, + u64 *data, u8 ndata); +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); + +void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa); +void kvm_prepare_unexpected_reason_exit(struct kvm_vcpu *vcpu, u64 exit_reason); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); -int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); -int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); - -struct x86_emulate_ctxt; +int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); +int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); +int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); +int kvm_emulate_invd(struct kvm_vcpu *vcpu); +int kvm_emulate_mwait(struct kvm_vcpu *vcpu); +int kvm_handle_invalid_op(struct kvm_vcpu *vcpu); +int kvm_emulate_monitor(struct kvm_vcpu *vcpu); int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int kvm_vcpu_halt(struct kvm_vcpu *vcpu); +int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); +void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); +void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); -int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); +unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); -void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); -int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); +int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); -bool kvm_rdpmc(struct kvm_vcpu *vcpu); +int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); -void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); -void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); +void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, + bool has_error_code, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); -int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gfn_t gfn, void *data, int offset, int len, - u32 access); +void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); @@ -1370,59 +2250,68 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, return !!(*irq_state); } -#define KVM_MMU_ROOT_CURRENT BIT(0) -#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) -#define KVM_MMU_ROOTS_ALL (~0UL) +void kvm_inject_nmi(struct kvm_vcpu *vcpu); +int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); -int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); -void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); +void kvm_update_dr7(struct kvm_vcpu *vcpu); -void kvm_inject_nmi(struct kvm_vcpu *vcpu); +bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + bool always_retry); -int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); -int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); -void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); -int kvm_mmu_load(struct kvm_vcpu *vcpu); -void kvm_mmu_unload(struct kvm_vcpu *vcpu); -void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, +static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, + gpa_t cr2_or_gpa) +{ + return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false); +} + +void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free); -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, - struct x86_exception *exception); +void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); -gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, - struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); -void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); +bool kvm_apicv_activated(struct kvm *kvm); +bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu); +void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); +void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set); +void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set); -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); +static inline void kvm_set_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason) +{ + kvm_set_or_clear_apicv_inhibit(kvm, reason, true); +} -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason) +{ + kvm_set_or_clear_apicv_inhibit(kvm, reason, false); +} + +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); +void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + u64 addr, unsigned long roots); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); -void kvm_enable_tdp(void); -void kvm_disable_tdp(void); +void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + int tdp_max_root_level, int tdp_huge_page_level); -static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, - struct x86_exception *exception) -{ - return gpa; -} -static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) -{ - struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) +#endif - return (struct kvm_mmu_page *)page_private(page); -} +#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) static inline u16 kvm_read_ldt(void) { @@ -1441,16 +2330,11 @@ static inline unsigned long read_msr(unsigned long msr) { u64 value; - rdmsrl(msr, value); + rdmsrq(msr, value); return value; } #endif -static inline u32 get_rdx_init_val(void) -{ - return 0x600; /* P6 family */ -} - static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) { kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); @@ -1470,113 +2354,90 @@ enum { TASK_SWITCH_GATE = 3, }; -#define HF_GIF_MASK (1 << 0) -#define HF_HIF_MASK (1 << 1) -#define HF_VINTR_MASK (1 << 2) -#define HF_NMI_MASK (1 << 3) -#define HF_IRET_MASK (1 << 4) -#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ -#define HF_SMM_MASK (1 << 6) -#define HF_SMM_INSIDE_NMI_MASK (1 << 7) +#define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */ -#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE -#define KVM_ADDRESS_SPACE_NUM 2 +#ifdef CONFIG_KVM_SMM +#define HF_SMM_MASK (1 << 1) +#define HF_SMM_INSIDE_NMI_MASK (1 << 2) -#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) -#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +# define KVM_MAX_NR_ADDRESS_SPACES 2 +/* SMM is currently unsupported for guests with private memory. */ +# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2) +# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +#else +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) +#endif -/* - * Hardware virtualization extension instructions may fault if a - * reboot turns off virtualization while processes are running. - * Trap the fault and ignore the instruction if that happens. - */ -asmlinkage void kvm_spurious_fault(void); - -#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ - "666: " insn "\n\t" \ - "668: \n\t" \ - ".pushsection .fixup, \"ax\" \n" \ - "667: \n\t" \ - cleanup_insn "\n\t" \ - "cmpb $0, kvm_rebooting \n\t" \ - "jne 668b \n\t" \ - __ASM_SIZE(push) " $666b \n\t" \ - "jmp kvm_spurious_fault \n\t" \ - ".popsection \n\t" \ - _ASM_EXTABLE(666b, 667b) - -#define __kvm_handle_fault_on_reboot(insn) \ - ____kvm_handle_fault_on_reboot(insn, "") - -#define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); +int kvm_cpu_get_extint(struct kvm_vcpu *v); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); -void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); -u64 kvm_get_arch_capabilities(void); -void kvm_define_shared_msr(unsigned index, u32 msr); -int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +int kvm_add_user_return_msr(u32 msr); +int kvm_find_user_return_msr(u32 msr); +int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); +u64 kvm_get_user_return_msr(unsigned int slot); -u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); +static inline bool kvm_is_supported_user_return_msr(u32 msr) +{ + return kvm_find_user_return_msr(msr) >= 0; +} + +u64 kvm_scale_tsc(u64 tsc, u64 ratio); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); +u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); +u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); -void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, + unsigned long *vcpu_bitmap); -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); -void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); - -int kvm_is_in_guest(void); -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); -int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); +void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, + u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); -bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, - struct kvm_vcpu **dest_vcpu); - -void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq); +static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) +{ + /* We can only post Fixed and LowPrio IRQs */ + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); +} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { - if (kvm_x86_ops->vcpu_blocking) - kvm_x86_ops->vcpu_blocking(vcpu); + kvm_x86_call(vcpu_blocking)(vcpu); } static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { - if (kvm_x86_ops->vcpu_unblocking) - kvm_x86_ops->vcpu_unblocking(vcpu); + kvm_x86_call(vcpu_unblocking)(vcpu); } -static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} - static inline int kvm_cpu_get_apicid(int mps_cpu) { #ifdef CONFIG_X86_LOCAL_APIC @@ -1587,7 +2448,37 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #endif } -#define put_smstate(type, buf, offset, val) \ - *(type *)((buf) + (offset) - 0x7e00) = val +int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); + +#define KVM_CLOCK_VALID_FLAGS \ + (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) + +#define KVM_X86_VALID_QUIRKS \ + (KVM_X86_QUIRK_LINT0_REENABLED | \ + KVM_X86_QUIRK_CD_NW_CLEARED | \ + KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \ + KVM_X86_QUIRK_OUT_7E_INC_RIP | \ + KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ + KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ + KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ + KVM_X86_QUIRK_SLOT_ZAP_ALL | \ + KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \ + KVM_X86_QUIRK_IGNORE_GUEST_PAT) + +#define KVM_X86_CONDITIONAL_QUIRKS \ + (KVM_X86_QUIRK_CD_NW_CLEARED | \ + KVM_X86_QUIRK_IGNORE_GUEST_PAT) + +/* + * KVM previously used a u32 field in kvm_run to indicate the hypercall was + * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the + * remaining 31 lower bits must be 0 to preserve ABI. + */ +#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) + +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_device_posted_irqs; +} #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 172f9749dbb2..3d040741044b 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -2,11 +2,9 @@ #ifndef _ASM_X86_KVM_PAGE_TRACK_H #define _ASM_X86_KVM_PAGE_TRACK_H -enum kvm_page_track_mode { - KVM_PAGE_TRACK_WRITE, - KVM_PAGE_TRACK_MAX, -}; +#include <linux/kvm_types.h> +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING /* * The notifier represented by @kvm_page_track_notifier_node is linked into * the head which will be notified when guest is triggering the track event. @@ -26,50 +24,39 @@ struct kvm_page_track_notifier_node { * It is called when guest is writing the write-tracked page * and write emulation is finished at that time. * - * @vcpu: the vcpu where the write access happened. * @gpa: the physical address written by guest. * @new: the data was written to the address. * @bytes: the written length. * @node: this node */ - void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes, struct kvm_page_track_notifier_node *node); + void (*track_write)(gpa_t gpa, const u8 *new, int bytes, + struct kvm_page_track_notifier_node *node); + /* - * It is called when memory slot is being moved or removed - * users can drop write-protection for the pages in that memory slot + * Invoked when a memory region is removed from the guest. Or in KVM + * terms, when a memslot is deleted. * - * @kvm: the kvm where memory slot being moved or removed - * @slot: the memory slot being moved or removed - * @node: this node + * @gfn: base gfn of the region being removed + * @nr_pages: number of pages in the to-be-removed region + * @node: this node */ - void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot, - struct kvm_page_track_notifier_node *node); + void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages, + struct kvm_page_track_notifier_node *node); }; -void kvm_page_track_init(struct kvm *kvm); -void kvm_page_track_cleanup(struct kvm *kvm); - -void kvm_page_track_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); -int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, - unsigned long npages); +int kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); +void kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n); -void kvm_slot_page_track_add_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode); -void kvm_slot_page_track_remove_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode); -bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, - enum kvm_page_track_mode mode); +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn); +int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn); +#else +/* + * Allow defining a node in a structure even if page tracking is disabled, e.g. + * to play nice with testing headers via direct inclusion from the command line. + */ +struct kvm_page_track_notifier_node {}; +#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */ -void -kvm_page_track_register_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n); -void -kvm_page_track_unregister_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n); -void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes); -void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot); #endif diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 5ed3cf1c3934..4a47c16e2df8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -4,9 +4,10 @@ #include <asm/processor.h> #include <asm/alternative.h> +#include <linux/interrupt.h> #include <uapi/asm/kvm_para.h> -extern void kvmclock_init(void); +#include <asm/tdx.h> #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); @@ -18,7 +19,7 @@ static inline bool kvm_check_and_clear_guest_paused(void) #endif /* CONFIG_KVM_GUEST */ #define KVM_HYPERCALL \ - ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) + ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL) /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the @@ -33,6 +34,10 @@ static inline bool kvm_check_and_clear_guest_paused(void) static inline long kvm_hypercall0(unsigned int nr) { long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, 0, 0, 0, 0); + asm volatile(KVM_HYPERCALL : "=a"(ret) : "a"(nr) @@ -43,6 +48,10 @@ static inline long kvm_hypercall0(unsigned int nr) static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) { long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, 0, 0, 0); + asm volatile(KVM_HYPERCALL : "=a"(ret) : "a"(nr), "b"(p1) @@ -54,6 +63,10 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, unsigned long p2) { long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, p2, 0, 0); + asm volatile(KVM_HYPERCALL : "=a"(ret) : "a"(nr), "b"(p1), "c"(p2) @@ -65,6 +78,10 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3) { long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, p2, p3, 0); + asm volatile(KVM_HYPERCALL : "=a"(ret) : "a"(nr), "b"(p1), "c"(p2), "d"(p3) @@ -77,6 +94,10 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, unsigned long p4) { long ret; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return tdx_kvm_hypercall(nr, p1, p2, p3, p4); + asm volatile(KVM_HYPERCALL : "=a"(ret) : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4) @@ -84,15 +105,37 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, return ret; } +static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + long ret; + + asm volatile("vmmcall" + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3) + : "memory"); + return ret; +} + #ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); -void kvm_async_pf_task_wake(u32 token); -u32 kvm_read_and_reset_pf_reason(void); -extern void kvm_disable_steal_time(void); -void do_async_page_fault(struct pt_regs *regs, unsigned long error_code); +void kvm_async_pf_task_wait_schedule(u32 token); +u32 kvm_read_and_reset_apf_flags(void); +bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); + +DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); + +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + if (static_branch_unlikely(&kvm_async_pf_enabled)) + return __kvm_handle_async_pf(regs, token); + else + return false; +} #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); @@ -103,8 +146,7 @@ static inline void kvm_spinlock_init(void) #endif /* CONFIG_PARAVIRT_SPINLOCKS */ #else /* CONFIG_KVM_GUEST */ -#define kvm_async_pf_task_wait(T, I) do {} while(0) -#define kvm_async_pf_task_wake(T) do {} while(0) +#define kvm_async_pf_task_wait_schedule(T) do {} while(0) static inline bool kvm_para_available(void) { @@ -121,14 +163,14 @@ static inline unsigned int kvm_arch_para_hints(void) return 0; } -static inline u32 kvm_read_and_reset_pf_reason(void) +static inline u32 kvm_read_and_reset_apf_flags(void) { return 0; } -static inline void kvm_disable_steal_time(void) +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) { - return; + return false; } #endif diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h new file mode 100644 index 000000000000..d7c704ed1be9 --- /dev/null +++ b/arch/x86/include/asm/kvm_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_TYPES_H +#define _ASM_X86_KVM_TYPES_H + +#if IS_MODULE(CONFIG_KVM_AMD) && IS_MODULE(CONFIG_KVM_INTEL) +#define KVM_SUB_MODULES kvm-amd,kvm-intel +#elif IS_MODULE(CONFIG_KVM_AMD) +#define KVM_SUB_MODULES kvm-amd +#elif IS_MODULE(CONFIG_KVM_INTEL) +#define KVM_SUB_MODULES kvm-intel +#else +#undef KVM_SUB_MODULES +/* + * Don't export symbols for KVM without vendor modules, as kvm.ko is built iff + * at least one vendor module is enabled. + */ +#define EXPORT_SYMBOL_FOR_KVM(symbol) +#endif + +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 + +#endif /* _ASM_X86_KVM_TYPES_H */ diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h new file mode 100644 index 000000000000..1af2cb59233b --- /dev/null +++ b/arch/x86/include/asm/kvm_vcpu_regs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_VCPU_REGS_H +#define _ASM_X86_KVM_VCPU_REGS_H + +#define __VCPU_REGS_RAX 0 +#define __VCPU_REGS_RCX 1 +#define __VCPU_REGS_RDX 2 +#define __VCPU_REGS_RBX 3 +#define __VCPU_REGS_RSP 4 +#define __VCPU_REGS_RBP 5 +#define __VCPU_REGS_RSI 6 +#define __VCPU_REGS_RDI 7 + +#ifdef CONFIG_X86_64 +#define __VCPU_REGS_R8 8 +#define __VCPU_REGS_R9 9 +#define __VCPU_REGS_R10 10 +#define __VCPU_REGS_R11 11 +#define __VCPU_REGS_R12 12 +#define __VCPU_REGS_R13 13 +#define __VCPU_REGS_R14 14 +#define __VCPU_REGS_R15 15 +#endif + +#endif /* _ASM_X86_KVM_VCPU_REGS_H */ diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h index eceea9299097..f163176d6f7f 100644 --- a/arch/x86/include/asm/kvmclock.h +++ b/arch/x86/include/asm/kvmclock.h @@ -2,6 +2,18 @@ #ifndef _ASM_X86_KVM_CLOCK_H #define _ASM_X86_KVM_CLOCK_H -extern struct clocksource kvm_clock; +#include <linux/percpu.h> + +DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); + +static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +{ + return &this_cpu_read(hv_clock_per_cpu)->pvti; +} + +static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) +{ + return this_cpu_read(hv_clock_per_cpu); +} #endif /* _ASM_X86_KVM_CLOCK_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 14caa9d9fb7f..9d38ae744a2e 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -3,26 +3,153 @@ #define _ASM_X86_LINKAGE_H #include <linux/stringify.h> +#include <asm/ibt.h> #undef notrace #define notrace __attribute__((no_instrument_function)) +#ifdef CONFIG_64BIT +/* + * The generic version tends to create spurious ENDBR instructions under + * certain conditions. + */ +#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; }) +#endif + #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ -#ifdef __ASSEMBLY__ +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; +#define __ALIGN_STR __stringify(__ALIGN) -#define GLOBAL(name) \ - .globl name; \ - name: +#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90; +#else +#define FUNCTION_PADDING +#endif -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) -#define __ALIGN .p2align 4, 0x90 -#define __ALIGN_STR __stringify(__ALIGN) +#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +# define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING +#else +# define __FUNC_ALIGN __ALIGN +#endif + +#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN) +#define SYM_F_ALIGN __FUNC_ALIGN + +#ifdef __ASSEMBLER__ + +#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_MITIGATION_RETPOLINE */ +#ifdef CONFIG_MITIGATION_SLS +#define RET ret; int3 +#else +#define RET ret #endif +#endif /* CONFIG_MITIGATION_RETPOLINE */ + +#else /* __ASSEMBLER__ */ + +#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_MITIGATION_RETPOLINE */ +#ifdef CONFIG_MITIGATION_SLS +#define ASM_RET "ret; int3\n\t" +#else +#define ASM_RET "ret\n\t" +#endif +#endif /* CONFIG_MITIGATION_RETPOLINE */ + +#endif /* __ASSEMBLER__ */ + +/* + * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the + * CFI symbol layout changes. + * + * Without CALL_THUNKS: + * + * .align FUNCTION_ALIGNMENT + * __cfi_##name: + * .skip FUNCTION_PADDING, 0x90 + * .byte 0xb8 + * .long __kcfi_typeid_##name + * name: + * + * With CALL_THUNKS: + * + * .align FUNCTION_ALIGNMENT + * __cfi_##name: + * .byte 0xb8 + * .long __kcfi_typeid_##name + * .skip FUNCTION_PADDING, 0x90 + * name: + * + * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized. + */ -#endif /* __ASSEMBLY__ */ +#ifdef CONFIG_CALL_PADDING +#define CFI_PRE_PADDING +#define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; +#else +#define CFI_PRE_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; +#define CFI_POST_PADDING +#endif + +#define __CFI_TYPE(name) \ + SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \ + CFI_PRE_PADDING \ + .byte 0xb8 ASM_NL \ + .long __kcfi_typeid_##name ASM_NL \ + CFI_POST_PADDING \ + SYM_FUNC_END(__cfi_##name) + +/* UML needs to be able to override memcpy() and friends for KASAN. */ +#ifdef CONFIG_UML +# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS_WEAK +#else +# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS +#endif + +/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ + ENDBR + +/* SYM_FUNC_START -- use for global functions */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) + +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) + +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) + +/* + * Expose 'sym' to the startup code in arch/x86/boot/startup/, by emitting an + * alias prefixed with __pi_ + */ +#ifdef __ASSEMBLER__ +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_L_GLOBAL) +#else +#define SYM_PIC_ALIAS(sym) extern typeof(sym) __PASTE(__pi_, sym) __alias(sym) +#endif #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h deleted file mode 100644 index ed80003ce3e2..000000000000 --- a/arch/x86/include/asm/livepatch.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * livepatch.h - x86-specific Kernel Live Patching Core - * - * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> - * Copyright (C) 2014 SUSE - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef _ASM_X86_LIVEPATCH_H -#define _ASM_X86_LIVEPATCH_H - -#include <asm/setup.h> -#include <linux/ftrace.h> - -static inline int klp_check_compiler_support(void) -{ -#ifndef CC_USING_FENTRY - return 1; -#endif - return 0; -} - -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) -{ - regs->ip = ip; -} - -#endif /* _ASM_X86_LIVEPATCH_H */ diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 349a47acaa4a..59aa966dc212 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -120,34 +120,54 @@ static inline long local_sub_return(long i, local_t *l) #define local_inc_return(l) (local_add_return(1, l)) #define local_dec_return(l) (local_sub_return(1, l)) -#define local_cmpxchg(l, o, n) \ - (cmpxchg_local(&((l)->a.counter), (o), (n))) -/* Always has a lock prefix */ -#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) +static inline long local_cmpxchg(local_t *l, long old, long new) +{ + return cmpxchg_local(&l->a.counter, old, new); +} + +static inline bool local_try_cmpxchg(local_t *l, long *old, long new) +{ + return try_cmpxchg_local(&l->a.counter, + (typeof(l->a.counter) *) old, new); +} + +/* + * Implement local_xchg using CMPXCHG instruction without the LOCK prefix. + * XCHG is expensive due to the implied LOCK prefix. The processor + * cannot prefetch cachelines if XCHG is used. + */ +static __always_inline long +local_xchg(local_t *l, long n) +{ + long c = local_read(l); + + do { } while (!local_try_cmpxchg(l, &c, n)); + + return c; +} /** - * local_add_unless - add unless the number is a given value + * local_add_unless - add unless the number is already a given value * @l: pointer of type local_t * @a: the amount to add to l... * @u: ...unless l is equal to u. * - * Atomically adds @a to @l, so long as it was not @u. - * Returns non-zero if @l was not @u, and zero otherwise. + * Atomically adds @a to @l, if @v was not already @u. + * Returns true if the addition was done. */ -#define local_add_unless(l, a, u) \ -({ \ - long c, old; \ - c = local_read((l)); \ - for (;;) { \ - if (unlikely(c == (u))) \ - break; \ - old = local_cmpxchg((l), c, c + (a)); \ - if (likely(old == c)) \ - break; \ - c = old; \ - } \ - c != (u); \ -}) +static __always_inline bool +local_add_unless(local_t *l, long a, long u) +{ + long c = local_read(l); + + do { + if (unlikely(c == u)) + return false; + } while (!local_try_cmpxchg(l, &c, c + a)); + + return true; +} + #define local_inc_not_zero(l) local_add_unless((l), 1, 0) /* On x86_32, these are no better than the atomic variants. diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/x86/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 97198001e567..6115bb3d5795 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,7 +95,7 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(const struct timespec64 *now); +extern int mach_set_cmos_time(const struct timespec64 *now); extern void mach_get_cmos_time(struct timespec64 *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c1a812bd5a27..2d98886de09a 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -13,6 +13,7 @@ #define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ #define MCG_EXT_P BIT_ULL(9) /* Extended registers available */ #define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ +#define MCG_SEAM_NR BIT_ULL(12) /* MCG_STATUS_SEAM_NR supported */ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ #define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) @@ -25,6 +26,7 @@ #define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */ #define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */ +#define MCG_STATUS_SEAM_NR BIT_ULL(12) /* Machine check inside SEAM non-root mode */ /* MCG_EXT_CTL register defines */ #define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */ @@ -42,12 +44,15 @@ #define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ #define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) #define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) +#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff) /* AMD-specific bits */ #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ +#define MCI_STATUS_PADDRV BIT_ULL(54) /* Valid System Physical Address */ #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ +#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */ /* * McaX field if set indicates a given bank supports MCA extensions: @@ -57,6 +62,8 @@ * - TCC bit is present in MCx_STATUS. */ #define MCI_CONFIG_MCAX 0x1 +#define MCI_CONFIG_FRUTEXT BIT_ULL(9) +#define MCI_CONFIG_PADDRV BIT_ULL(11) #define MCI_IPID_MCATYPE 0xFFFF0000 #define MCI_IPID_HWID 0xFFF @@ -86,6 +93,9 @@ #define MCI_MISC_ADDR_MEM 3 /* memory address */ #define MCI_MISC_ADDR_GENERIC 7 /* generic */ +/* MCi_ADDR register defines */ +#define MCI_ADDR_PHYSADDR GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0) + /* CTL2 register defines */ #define MCI_CTL2_CMCI_EN BIT_ULL(30) #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL @@ -101,7 +111,7 @@ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ -#define MCE_LOG_LEN 32 +#define MCE_LOG_MIN_LEN 32U #define MCE_LOG_SIGNATURE "MACHINECHECK" /* AMD Scalable MCA */ @@ -115,6 +125,9 @@ #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a +/* Registers MISC2 to MISC4 are at offsets B to D. */ +#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e +#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) @@ -125,6 +138,40 @@ #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) +#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x)) + +#define XEC(x, mask) (((x) >> 16) & mask) + +/* mce.kflags flag bits for logging etc. */ +#define MCE_HANDLED_CEC BIT_ULL(0) +#define MCE_HANDLED_UC BIT_ULL(1) +#define MCE_HANDLED_EXTLOG BIT_ULL(2) +#define MCE_HANDLED_NFIT BIT_ULL(3) +#define MCE_HANDLED_EDAC BIT_ULL(4) +#define MCE_HANDLED_MCELOG BIT_ULL(5) + +/* + * Indicates an MCE which has happened in kernel space but from + * which the kernel can recover simply by executing fixup_exception() + * so that an error is returned to the caller of the function that + * hit the machine check. + */ +#define MCE_IN_KERNEL_RECOV BIT_ULL(6) + +/* + * Indicates an MCE that happened in kernel space while copying data + * from user. In this case fixup_exception() gets the kernel to the + * error exit for the copy function. Machine check handler can then + * treat it like a fault taken in user mode. + */ +#define MCE_IN_KERNEL_COPYIN BIT_ULL(7) + +/* + * Indicates that handler should check and clear Deferred error registers + * rather than common ones. + */ +#define MCE_CHECK_DFR_REGS BIT_ULL(8) /* * This structure contains all data related to the MCE log. Also @@ -134,23 +181,52 @@ */ struct mce_log_buffer { char signature[12]; /* "MACHINECHECK" */ - unsigned len; /* = MCE_LOG_LEN */ + unsigned len; /* = elements in .mce_entry[] */ unsigned next; unsigned flags; unsigned recordlen; /* length of struct mce */ - struct mce entry[MCE_LOG_LEN]; + struct mce entry[]; }; +/* Highest last */ enum mce_notifier_prios { - MCE_PRIO_FIRST = INT_MAX, - MCE_PRIO_SRAO = INT_MAX - 1, - MCE_PRIO_EXTLOG = INT_MAX - 2, - MCE_PRIO_NFIT = INT_MAX - 3, - MCE_PRIO_EDAC = INT_MAX - 4, - MCE_PRIO_MCELOG = 1, - MCE_PRIO_LOWEST = 0, + MCE_PRIO_LOWEST, + MCE_PRIO_MCELOG, + MCE_PRIO_EDAC, + MCE_PRIO_NFIT, + MCE_PRIO_EXTLOG, + MCE_PRIO_UC, + MCE_PRIO_EARLY, + MCE_PRIO_CEC, + MCE_PRIO_HIGHEST = MCE_PRIO_CEC +}; + +/** + * struct mce_hw_err - Hardware Error Record. + * @m: Machine Check record. + * @vendor: Vendor-specific error information. + * + * Vendor-specific fields should not be added to struct mce. Instead, vendors + * should export their vendor-specific data through their structure in the + * vendor union below. + * + * AMD's vendor data is parsed by error decoding tools for supplemental error + * information. Thus, current offsets of existing fields must be maintained. + * Only add new fields at the end of AMD's vendor structure. + */ +struct mce_hw_err { + struct mce m; + + union vendor_info { + struct { + u64 synd1; /* MCA_SYND1 MSR */ + u64 synd2; /* MCA_SYND2 MSR */ + } amd; + } vendor; }; +#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m) + struct notifier_block; extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); @@ -160,38 +236,39 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb); extern int mce_p5_enabled; +#ifdef CONFIG_ARCH_HAS_COPY_MC +extern void enable_copy_mc_fragile(void); +unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt); +#else +static inline void enable_copy_mc_fragile(void) +{ +} +#endif + +struct cper_ia_proc_ctx; + #ifdef CONFIG_X86_MCE int mcheck_init(void); +void mca_bsp_init(struct cpuinfo_x86 *c); void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_clear(struct cpuinfo_x86 *c); -void mcheck_vendor_init_severity(void); +int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); #else static inline int mcheck_init(void) { return 0; } +static inline void mca_bsp_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} -static inline void mcheck_vendor_init_severity(void) {} -#endif - -#ifdef CONFIG_X86_ANCIENT_MCE -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); -static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } -#else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void enable_p5_mce(void) {} +static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) { return -EINVAL; } #endif -void mce_setup(struct mce *m); -void mce_log(struct mce *m); +void mce_prep_record(struct mce_hw_err *err); +void mce_log(struct mce_hw_err *err); DECLARE_PER_CPU(struct device *, mce_device); -/* - * Maximum banks number. - * This is the limit of the current register layout on - * Intel CPUs. - */ -#define MAX_NR_BANKS 32 +/* Maximum number of MCA banks per CPU. */ +#define MAX_NR_BANKS 64 #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); @@ -209,20 +286,10 @@ static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} #endif -#ifdef CONFIG_X86_MCE_AMD -void mce_amd_feature_init(struct cpuinfo_x86 *c); -int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); -#else -static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } -static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; -#endif - -static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - -int mce_available(struct cpuinfo_x86 *c); +bool mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); -int mce_usable_address(struct mce *m); +bool mce_usable_address(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); @@ -233,24 +300,26 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); enum mcp_flags { MCP_TIMESTAMP = BIT(0), /* log time stamp */ MCP_UC = BIT(1), /* log uncorrected errors */ - MCP_DONTLOG = BIT(2), /* only clear, don't log */ + MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */ }; -bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); -int mce_notify_irq(void); +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); DECLARE_PER_CPU(struct mce, injectm); /* Disable CMCI/polling for MCA bank claimed by firmware */ extern void mce_disable_bank(int bank); +#ifdef CONFIG_X86_MCE_THRESHOLD +void mce_save_apei_thr_limit(u32 thr_limit); +#else +static inline void mce_save_apei_thr_limit(u32 thr_limit) { } +#endif /* CONFIG_X86_MCE_THRESHOLD */ + /* * Exception handler */ - -/* Call the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); -void do_machine_check(struct pt_regs *, long); +void do_machine_check(struct pt_regs *pt_regs); /* * Threshold handler @@ -261,28 +330,6 @@ extern void (*mce_threshold_vector)(void); extern void (*deferred_error_int_vector)(void); /* - * Thermal handler - */ - -void intel_init_thermal(struct cpuinfo_x86 *c); - -/* Interrupt Handler for core thermal thresholds */ -extern int (*platform_thermal_notify)(__u64 msr_val); - -/* Interrupt Handler for package thermal thresholds */ -extern int (*platform_thermal_package_notify)(__u64 msr_val); - -/* Callback support of rate control, return true, if - * callback has rate control */ -extern bool (*platform_thermal_package_rate_control)(void); - -#ifdef CONFIG_X86_THERMAL_VECTOR -extern void mcheck_intel_therm_init(void); -#else -static inline void mcheck_intel_therm_init(void) { } -#endif - -/* * Used by APEI to report memory error via /dev/mcelog */ @@ -299,6 +346,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ + SMCA_LS_V2, SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ @@ -307,43 +355,44 @@ enum smca_bank_types { SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ SMCA_CS, /* Coherent Slave */ + SMCA_CS_V2, SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, + SMCA_MA_LLC, /* Memory Attached Last Level Cache */ SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ + SMCA_PSP_V2, SMCA_SMU, /* System Management Unit */ + SMCA_SMU_V2, + SMCA_MP5, /* Microprocessor 5 Unit */ + SMCA_MPDMA, /* MPDMA Unit */ + SMCA_NBIO, /* Northbridge IO Unit */ + SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_NBIF, /* NBIF Unit */ + SMCA_SHUB, /* System HUB Unit */ + SMCA_SATA, /* SATA Unit */ + SMCA_USB, /* USB Unit */ + SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */ + SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */ + SMCA_GMI_PCS, /* GMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ + SMCA_GMI_PHY, /* GMI PHY Unit */ N_SMCA_BANK_TYPES }; -#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) - -struct smca_hwid { - unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ - u32 hwid_mcatype; /* (hwid,mcatype) tuple */ - u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ - u8 count; /* Number of instances. */ -}; - -struct smca_bank { - struct smca_hwid *hwid; - u32 id; /* Value of MCA_IPID[InstanceId]. */ - u8 sysfs_id; /* Value used for sysfs name. */ -}; - -extern struct smca_bank smca_banks[MAX_NR_BANKS]; - -extern const char *smca_get_long_name(enum smca_bank_types t); extern bool amd_mce_is_memory_error(struct mce *m); -extern int mce_threshold_create_device(unsigned int cpu); -extern int mce_threshold_remove_device(unsigned int cpu); - +void mce_amd_feature_init(struct cpuinfo_x86 *c); +enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); #else - -static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; -static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; -static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; - +static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif +unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h deleted file mode 100644 index eb59804b6201..000000000000 --- a/arch/x86/include/asm/mcsafe_test.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MCSAFE_TEST_H_ -#define _MCSAFE_TEST_H_ - -#ifndef __ASSEMBLY__ -#ifdef CONFIG_MCSAFE_TEST -extern unsigned long mcsafe_test_src; -extern unsigned long mcsafe_test_dst; - -static inline void mcsafe_inject_src(void *addr) -{ - if (addr) - mcsafe_test_src = (unsigned long) addr; - else - mcsafe_test_src = ~0UL; -} - -static inline void mcsafe_inject_dst(void *addr) -{ - if (addr) - mcsafe_test_dst = (unsigned long) addr; - else - mcsafe_test_dst = ~0UL; -} -#else /* CONFIG_MCSAFE_TEST */ -static inline void mcsafe_inject_src(void *addr) -{ -} - -static inline void mcsafe_inject_dst(void *addr) -{ -} -#endif /* CONFIG_MCSAFE_TEST */ - -#else /* __ASSEMBLY__ */ -#include <asm/export.h> - -#ifdef CONFIG_MCSAFE_TEST -.macro MCSAFE_TEST_CTL - .pushsection .data - .align 8 - .globl mcsafe_test_src - mcsafe_test_src: - .quad 0 - EXPORT_SYMBOL_GPL(mcsafe_test_src) - .globl mcsafe_test_dst - mcsafe_test_dst: - .quad 0 - EXPORT_SYMBOL_GPL(mcsafe_test_dst) - .popsection -.endm - -.macro MCSAFE_TEST_SRC reg count target - leaq \count(\reg), %r9 - cmp mcsafe_test_src, %r9 - ja \target -.endm - -.macro MCSAFE_TEST_DST reg count target - leaq \count(\reg), %r9 - cmp mcsafe_test_dst, %r9 - ja \target -.endm -#else -.macro MCSAFE_TEST_CTL -.endm - -.macro MCSAFE_TEST_SRC reg count target -.endm - -.macro MCSAFE_TEST_DST reg count target -.endm -#endif /* CONFIG_MCSAFE_TEST */ -#endif /* __ASSEMBLY__ */ -#endif /* _MCSAFE_TEST_H_ */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 616f8e637bc3..ea6494628cb0 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -1,28 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AMD Memory Encryption Support * * Copyright (C) 2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __X86_MEM_ENCRYPT_H__ #define __X86_MEM_ENCRYPT_H__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/init.h> +#include <linux/cc_platform.h> + +#include <asm/asm.h> +struct boot_params; -#include <asm/bootparam.h> +#ifdef CONFIG_X86_MEM_ENCRYPT +void __init mem_encrypt_init(void); +void __init mem_encrypt_setup_arch(void); +#else +static inline void mem_encrypt_init(void) { } +static inline void __init mem_encrypt_setup_arch(void) { } +#endif #ifdef CONFIG_AMD_MEM_ENCRYPT extern u64 sme_me_mask; -extern bool sev_enabled; +extern u64 sev_status; void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, unsigned long decrypted_kernel_vaddr, @@ -40,24 +47,29 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); -void __init sme_encrypt_kernel(struct boot_params *bp); -void __init sme_enable(struct boot_params *bp); +void sme_encrypt_kernel(struct boot_params *bp); +void sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, + unsigned long size, bool enc); -/* Architecture __weak replacement functions */ -void __init mem_encrypt_init(void); void __init mem_encrypt_free_decrypted_mem(void); -bool sme_active(void); -bool sev_active(void); +void __init sev_es_init_vc_handling(void); -#define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) +static inline u64 sme_get_me_mask(void) +{ + return sme_me_mask; +} + +#define __bss_decrypted __section(".bss..decrypted") #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL +#define sev_status 0ULL static inline void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) { } @@ -69,21 +81,28 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } -static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } -static inline void __init sme_enable(struct boot_params *bp) { } +static inline void sme_encrypt_kernel(struct boot_params *bp) { } +static inline void sme_enable(struct boot_params *bp) { } -static inline bool sme_active(void) { return false; } -static inline bool sev_active(void) { return false; } +static inline void sev_es_init_vc_handling(void) { } static inline int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; } static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +static inline void __init +early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {} + +static inline void mem_encrypt_free_decrypted_mem(void) { } + +static inline u64 sme_get_me_mask(void) { return 0; } #define __bss_decrypted #endif /* CONFIG_AMD_MEM_ENCRYPT */ +void add_encrypt_protection_map(void); + /* * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when * writing to or comparing values from the cr3 register. Having the @@ -95,6 +114,6 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h new file mode 100644 index 000000000000..113b2fa51849 --- /dev/null +++ b/arch/x86/include/asm/memtype.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MEMTYPE_H +#define _ASM_X86_MEMTYPE_H + +#include <linux/types.h> +#include <asm/pgtable_types.h> + +extern bool pat_enabled(void); +extern void pat_bp_init(void); +extern void pat_cpu_init(void); + +extern int memtype_reserve(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +extern int memtype_free(u64 start, u64 end); + +extern int memtype_kernel_map_sync(u64 base, unsigned long size, + enum page_cache_mode pcm); + +extern int memtype_reserve_io(resource_size_t start, resource_size_t end, + enum page_cache_mode *pcm); + +extern void memtype_free_io(resource_size_t start, resource_size_t end); + +extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + +bool x86_has_pat_wp(void); +enum page_cache_mode pgprot2cachemode(pgprot_t pgprot); + +#endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 2b7cc5397f80..8b41f26f003b 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -2,18 +2,7 @@ #ifndef _ASM_X86_MICROCODE_H #define _ASM_X86_MICROCODE_H -#include <asm/cpu.h> -#include <linux/earlycpio.h> -#include <linux/initrd.h> - -struct ucode_patch { - struct list_head plist; - void *data; /* Intel uses only this one */ - u32 patch_id; - u16 equiv_cpu; -}; - -extern struct list_head microcode_cache; +#include <asm/msr.h> struct cpu_signature { unsigned int sig; @@ -21,125 +10,84 @@ struct cpu_signature { unsigned int rev; }; -struct device; - -enum ucode_state { - UCODE_OK = 0, - UCODE_NEW, - UCODE_UPDATED, - UCODE_NFOUND, - UCODE_ERROR, +struct ucode_cpu_info { + struct cpu_signature cpu_sig; + void *mc; }; -struct microcode_ops { - enum ucode_state (*request_microcode_user) (int cpu, - const void __user *buf, size_t size); - - enum ucode_state (*request_microcode_fw) (int cpu, struct device *, - bool refresh_fw); - - void (*microcode_fini_cpu) (int cpu); +#ifdef CONFIG_MICROCODE +void load_ucode_bsp(void); +void load_ucode_ap(void); +void microcode_bsp_resume(void); +bool __init microcode_loader_disabled(void); +#else +static inline void load_ucode_bsp(void) { } +static inline void load_ucode_ap(void) { } +static inline void microcode_bsp_resume(void) { } +static inline bool __init microcode_loader_disabled(void) { return false; } +#endif - /* - * The generic 'microcode_core' part guarantees that - * the callbacks below run on a target cpu when they - * are being called. - * See also the "Synchronization" section in microcode_core.c. - */ - enum ucode_state (*apply_microcode) (int cpu); - int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); +extern unsigned long initrd_start_early; + +#ifdef CONFIG_CPU_SUP_INTEL +/* Intel specific microcode defines. Public for IFS */ +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int metasize; + unsigned int min_req_ver; + unsigned int reserved; }; -struct ucode_cpu_info { - struct cpu_signature cpu_sig; - int valid; - void *mc; +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[]; }; -extern struct ucode_cpu_info ucode_cpu_info[]; -struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa); -#ifdef CONFIG_MICROCODE_INTEL -extern struct microcode_ops * __init init_intel_microcode(void); -#else -static inline struct microcode_ops * __init init_intel_microcode(void) -{ - return NULL; -} -#endif /* CONFIG_MICROCODE_INTEL */ +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define MC_HEADER_TYPE_MICROCODE 1 +#define MC_HEADER_TYPE_IFS 2 -#ifdef CONFIG_MICROCODE_AMD -extern struct microcode_ops * __init init_amd_microcode(void); -extern void __exit exit_amd_microcode(void); -#else -static inline struct microcode_ops * __init init_amd_microcode(void) +static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr) { - return NULL; + return hdr->datasize ? : DEFAULT_UCODE_DATASIZE; } -static inline void __exit exit_amd_microcode(void) {} -#endif -#define MAX_UCODE_COUNT 128 - -#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) -#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') -#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') -#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') -#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') -#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') -#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') - -#define CPUID_IS(a, b, c, ebx, ecx, edx) \ - (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) - -/* - * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. - * x86_cpuid_vendor() gets vendor id for BSP. - * - * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify - * coding, we still use x86_cpuid_vendor() to get vendor id for AP. - * - * x86_cpuid_vendor() gets vendor information directly from CPUID. - */ -static inline int x86_cpuid_vendor(void) +static inline u32 intel_get_microcode_revision(void) { - u32 eax = 0x00000000; - u32 ebx, ecx = 0, edx; + u32 rev, dummy; - native_cpuid(&eax, &ebx, &ecx, &edx); + native_wrmsrq(MSR_IA32_UCODE_REV, 0); - if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) - return X86_VENDOR_INTEL; + /* As documented in the SDM: Do a CPUID 1 here */ + native_cpuid_eax(1); - if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) - return X86_VENDOR_AMD; + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); - return X86_VENDOR_UNKNOWN; + return rev; } +#endif /* !CONFIG_CPU_SUP_INTEL */ -static inline unsigned int x86_cpuid_family(void) -{ - u32 eax = 0x00000001; - u32 ebx, ecx = 0, edx; - - native_cpuid(&eax, &ebx, &ecx, &edx); +bool microcode_nmi_handler(void); +void microcode_offline_nmi_handler(void); - return x86_family(eax); +#ifdef CONFIG_MICROCODE_LATE_LOADING +DECLARE_STATIC_KEY_FALSE(microcode_nmi_handler_enable); +static __always_inline bool microcode_nmi_handler_enabled(void) +{ + return static_branch_unlikely(µcode_nmi_handler_enable); } - -#ifdef CONFIG_MICROCODE -int __init microcode_init(void); -extern void __init load_ucode_bsp(void); -extern void load_ucode_ap(void); -void reload_early_microcode(void); -extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); -extern bool initrd_gone; #else -static inline int __init microcode_init(void) { return 0; }; -static inline void __init load_ucode_bsp(void) { } -static inline void load_ucode_ap(void) { } -static inline void reload_early_microcode(void) { } -static inline bool -get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } +static __always_inline bool microcode_nmi_handler_enabled(void) { return false; } #endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h deleted file mode 100644 index 209492849566..000000000000 --- a/arch/x86/include/asm/microcode_amd.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MICROCODE_AMD_H -#define _ASM_X86_MICROCODE_AMD_H - -#include <asm/microcode.h> - -#define UCODE_MAGIC 0x00414d44 -#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -#define UCODE_UCODE_TYPE 0x00000001 - -#define SECTION_HDR_SIZE 8 -#define CONTAINER_HDR_SZ 12 - -struct equiv_cpu_entry { - u32 installed_cpu; - u32 fixed_errata_mask; - u32 fixed_errata_compare; - u16 equiv_cpu; - u16 res; -} __attribute__((packed)); - -struct microcode_header_amd { - u32 data_code; - u32 patch_id; - u16 mc_patch_data_id; - u8 mc_patch_data_len; - u8 init_flag; - u32 mc_patch_data_checksum; - u32 nb_dev_id; - u32 sb_dev_id; - u16 processor_rev_id; - u8 nb_rev_id; - u8 sb_rev_id; - u8 bios_api_rev; - u8 reserved1[3]; - u32 match_reg[8]; -} __attribute__((packed)); - -struct microcode_amd { - struct microcode_header_amd hdr; - unsigned int mpb[0]; -}; - -#define PATCH_MAX_SIZE PAGE_SIZE - -#ifdef CONFIG_MICROCODE_AMD -extern void __init load_ucode_amd_bsp(unsigned int family); -extern void load_ucode_amd_ap(unsigned int family); -extern int __init save_microcode_in_initrd_amd(unsigned int family); -void reload_ucode_amd(void); -#else -static inline void __init load_ucode_amd_bsp(unsigned int family) {} -static inline void load_ucode_amd_ap(unsigned int family) {} -static inline int __init -save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } -void reload_ucode_amd(void) {} -#endif -#endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h deleted file mode 100644 index d85a07d7154f..000000000000 --- a/arch/x86/include/asm/microcode_intel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MICROCODE_INTEL_H -#define _ASM_X86_MICROCODE_INTEL_H - -#include <asm/microcode.h> - -struct microcode_header_intel { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int reserved[3]; -}; - -struct microcode_intel { - struct microcode_header_intel hdr; - unsigned int bits[0]; -}; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[0]; -}; - -#define DEFAULT_UCODE_DATASIZE (2000) -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) - -#define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.totalsize : \ - DEFAULT_UCODE_TOTALSIZE) - -#define get_datasize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -static inline u32 intel_get_microcode_revision(void) -{ - u32 rev, dummy; - - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - native_cpuid_eax(1); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); - - return rev; -} - -#ifdef CONFIG_MICROCODE_INTEL -extern void __init load_ucode_intel_bsp(void); -extern void load_ucode_intel_ap(void); -extern void show_ucode_info_early(void); -extern int __init save_microcode_in_initrd_intel(void); -void reload_ucode_intel(void); -#else -static inline __init void load_ucode_intel_bsp(void) {} -static inline void load_ucode_intel_ap(void) {} -static inline void show_ucode_info_early(void) {} -static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; } -static inline void reload_ucode_intel(void) {} -#endif - -#endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h new file mode 100644 index 000000000000..12b820259b9f --- /dev/null +++ b/arch/x86/include/asm/mman.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#define arch_calc_vm_prot_bits(prot, key) ( \ + ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ + ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ + ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \ + ((key) & 0x8 ? VM_PKEY_BIT3 : 0)) +#endif + +#include <uapi/asm/mman.h> + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5ff3e8af2c20..0fe9c569d171 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -6,6 +6,18 @@ #include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/atomic.h> +#include <linux/bits.h> + +/* Uprobes on this MM assume 32-bit code */ +#define MM_CONTEXT_UPROBE_IA32 0 +/* vsyscall page is accessible on this MM */ +#define MM_CONTEXT_HAS_VSYSCALL 1 +/* Do not allow changing LAM mode */ +#define MM_CONTEXT_LOCK_LAM 2 +/* Allow LAM and SVA coexisting */ +#define MM_CONTEXT_FORCE_TAGGED_SVA 3 +/* Tracks mm_cpumask */ +#define MM_CONTEXT_NOTRACK 4 /* * x86 has arch-specific MMU state beyond what lives in mm_struct. @@ -27,14 +39,21 @@ typedef struct { */ atomic64_t tlb_gen; + unsigned long next_trim_cpumask; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct rw_semaphore ldt_usr_sem; struct ldt_struct *ldt; #endif -#ifdef CONFIG_X86_64 - /* True if mm supports a task running in 32 bit compatibility mode. */ - unsigned short ia32_compat; + unsigned long flags; + +#ifdef CONFIG_ADDRESS_MASKING + /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */ + unsigned long lam_cr3_mask; + + /* Significant bits of the virtual address. Excludes tag bits. */ + u64 untag_mask; #endif struct mutex lock; @@ -45,22 +64,32 @@ typedef struct { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* * One bit per protection key says whether userspace can - * use it or not. protected by mmap_sem. + * use it or not. protected by mmap_lock. */ u16 pkey_allocation_map; s16 execute_only_pkey; #endif -#ifdef CONFIG_X86_INTEL_MPX - /* address of the bounds directory */ - void __user *bd_addr; + +#ifdef CONFIG_BROADCAST_TLB_FLUSH + /* + * The global ASID will be a non-zero value when the process has + * the same ASID across all CPUs, allowing it to make use of + * hardware-assisted remote TLB invalidation like AMD INVLPGB. + */ + u16 global_asid; + + /* The process is transitioning to a new global ASID number. */ + bool asid_transition; #endif } mm_context_t; #define INIT_MM_CONTEXT(mm) \ .context = { \ .ctx_id = 1, \ + .lock = __MUTEX_INITIALIZER(mm.context.lock), \ } -void leave_mm(int cpu); +void leave_mm(void); +#define leave_mm leave_mm #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 0ca50611e8ce..73bf3b1b44e8 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -2,41 +2,24 @@ #ifndef _ASM_X86_MMU_CONTEXT_H #define _ASM_X86_MMU_CONTEXT_H -#include <asm/desc.h> #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/pkeys.h> #include <trace/events/tlb.h> -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> -#include <asm/mpx.h> +#include <asm/debugreg.h> +#include <asm/gsseg.h> +#include <asm/desc.h> extern atomic64_t last_mm_ctx_id; -#ifndef CONFIG_PARAVIRT_XXL -static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) -{ -} -#endif /* !CONFIG_PARAVIRT_XXL */ - #ifdef CONFIG_PERF_EVENTS - +DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); - -static inline void load_mm_cr4(struct mm_struct *mm) -{ - if (static_branch_unlikely(&rdpmc_always_available_key) || - atomic_read(&mm->context.perf_rdpmc_allowed)) - cr4_set_bits(X86_CR4_PCE); - else - cr4_clear_bits(X86_CR4_PCE); -} -#else -static inline void load_mm_cr4(struct mm_struct *mm) {} +void cr4_update_pce(void *ignored); #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL @@ -66,14 +49,6 @@ struct ldt_struct { int slot; }; -/* This is a multiple of PAGE_SIZE. */ -#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) - -static inline void *ldt_slot_va(int slot) -{ - return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); -} - /* * Used for LDT copy/destruction. */ @@ -96,88 +71,84 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL +extern void load_mm_ldt(struct mm_struct *mm); +extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); +#else static inline void load_mm_ldt(struct mm_struct *mm) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - - /* READ_ONCE synchronizes with smp_store_release */ - ldt = READ_ONCE(mm->context.ldt); + clear_LDT(); +} +static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +{ + DEBUG_LOCKS_WARN_ON(preemptible()); +} +#endif +#ifdef CONFIG_ADDRESS_MASKING +static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) +{ /* - * Any change to mm->context.ldt is followed by an IPI to all - * CPUs with the mm active. The LDT will not be freed until - * after the IPI is handled by all such CPUs. This means that, - * if the ldt_struct changes before we return, the values we see - * will be safe, and the new values will be loaded before we run - * any user code. - * - * NB: don't try to convert this to use RCU without extreme care. - * We would still need IRQs off, because we don't want to change - * the local LDT after an IPI loaded a newer value than the one - * that we can see. + * When switch_mm_irqs_off() is called for a kthread, it may race with + * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two + * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it + * reads a single value for both. */ + return READ_ONCE(mm->context.lam_cr3_mask); +} - if (unlikely(ldt)) { - if (static_cpu_has(X86_FEATURE_PTI)) { - if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { - /* - * Whoops -- either the new LDT isn't mapped - * (if slot == -1) or is mapped into a bogus - * slot (if slot > 1). - */ - clear_LDT(); - return; - } - - /* - * If page table isolation is enabled, ldt->entries - * will not be mapped in the userspace pagetables. - * Tell the CPU to access the LDT through the alias - * at ldt_slot_va(ldt->slot). - */ - set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); - } else { - set_ldt(ldt->entries, ldt->nr_entries); - } - } else { - clear_LDT(); - } +static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) +{ + mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask; + mm->context.untag_mask = oldmm->context.untag_mask; +} + +#define mm_untag_mask mm_untag_mask +static inline unsigned long mm_untag_mask(struct mm_struct *mm) +{ + return mm->context.untag_mask; +} + +static inline void mm_reset_untag_mask(struct mm_struct *mm) +{ + mm->context.untag_mask = -1UL; +} + +#define arch_pgtable_dma_compat arch_pgtable_dma_compat +static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) +{ + return !mm_lam_cr3_mask(mm) || + test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags); +} #else - clear_LDT(); -#endif + +static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) +{ + return 0; } -static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT if either the old or new mm had an LDT. - * - * An mm will never go from having an LDT to not having an LDT. Two - * mms never share an LDT, so we don't gain anything by checking to - * see whether the LDT changed. There's also no guarantee that - * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, - * then prev->context.ldt will also be non-NULL. - * - * If we really cared, we could optimize the case where prev == next - * and we're exiting lazy mode. Most of the time, if this happens, - * we don't actually need to reload LDTR, but modify_ldt() is mostly - * used by legacy code and emulators where we don't need this level of - * performance. - * - * This uses | instead of || because it generates better code. - */ - if (unlikely((unsigned long)prev->context.ldt | - (unsigned long)next->context.ldt)) - load_mm_ldt(next); -#endif +} - DEBUG_LOCKS_WARN_ON(preemptible()); +static inline void mm_reset_untag_mask(struct mm_struct *mm) +{ } +#endif + +#define enter_lazy_tlb enter_lazy_tlb +extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); -void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +#define mm_init_global_asid mm_init_global_asid +extern void mm_init_global_asid(struct mm_struct *mm); +extern void mm_free_global_asid(struct mm_struct *mm); + +/* + * Init a new mm. Used on mm copies, like at fork() + * and on mm's that are brand-new, like at execve(). + */ +#define init_new_context init_new_context static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -185,6 +156,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); + mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { @@ -194,12 +166,18 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif + + mm_init_global_asid(mm); + mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; } + +#define destroy_context destroy_context static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); + mm_free_global_asid(mm); } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, @@ -211,26 +189,42 @@ extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, #define activate_mm(prev, next) \ do { \ - paravirt_activate_mm((prev), (next)); \ - switch_mm((prev), (next), NULL); \ + paravirt_enter_mmap(next); \ + switch_mm_irqs_off((prev), (next), NULL); \ } while (0); #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - lazy_load_gs(0); \ + loadsegment(gs, 0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ do { \ + shstk_free(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) #endif +static inline void arch_dup_pkeys(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + /* Duplicate the oldmm pkey state in mm: */ + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; + mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; +#endif +} + static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - paravirt_arch_dup_mmap(oldmm, mm); + arch_dup_pkeys(oldmm, mm); + paravirt_enter_mmap(mm); + dup_lam(oldmm, mm); return ldt_dup_context(oldmm, mm); } @@ -244,7 +238,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm) static inline bool is_64bit_mm(struct mm_struct *mm) { return !IS_ENABLED(CONFIG_IA32_EMULATION) || - !(mm->context.ia32_compat == TIF_IA32); + !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags); } #else static inline bool is_64bit_mm(struct mm_struct *mm) @@ -253,34 +247,14 @@ static inline bool is_64bit_mm(struct mm_struct *mm) } #endif -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) +static inline bool is_notrack_mm(struct mm_struct *mm) { - mpx_mm_init(mm); + return test_bit(MM_CONTEXT_NOTRACK, &mm->context.flags); } -static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void set_notrack_mm(struct mm_struct *mm) { - /* - * mpx_notify_unmap() goes and reads a rarely-hot - * cacheline in the mm_struct. That can be expensive - * enough to be seen in profiles. - * - * The mpx_notify_unmap() call and its contents have been - * observed to affect munmap() performance on hardware - * where MPX is not present. - * - * The unlikely() optimizes for the fast case: no MPX - * in the CPU, or no MPX use in the process. Even if - * we get this wrong (in the unlikely event that MPX - * is widely enabled on some system) the overhead of - * MPX itself (reading bounds tables) is expected to - * overwhelm the overhead of getting this unlikely() - * consistently wrong. - */ - if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) - mpx_notify_unmap(mm, vma, start, end); + set_bit(MM_CONTEXT_NOTRACK, &mm->context.flags); } /* @@ -292,21 +266,6 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, * So do not enforce things if the VMA is not from the current * mm, or if we are in a kernel thread. */ -static inline bool vma_is_foreign(struct vm_area_struct *vma) -{ - if (!current->mm) - return true; - /* - * Should PKRU be enforced on the access to this VMA? If - * the VMA is from another process, then PKRU has no - * relevance and should not be enforced. - */ - if (current->mm != vma->vm_mm) - return true; - - return false; -} - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { @@ -319,23 +278,11 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } -/* - * This can be used from process context to figure out what the value of - * CR3 is without needing to do a (slow) __read_cr3(). - * - * It's intended to be used for code like KVM that sneakily changes CR3 - * and needs to restore it. It needs to be used very carefully. - */ -static inline unsigned long __get_current_cr3_fast(void) -{ - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, - this_cpu_read(cpu_tlbstate.loaded_mm_asid)); +unsigned long __get_current_cr3_fast(void); - /* For now, be very restrictive about when this can be called. */ - VM_WARN_ON(in_nmi() || preemptible()); +#include <asm-generic/mmu_context.h> - VM_BUG_ON(cr3 != __read_cr3()); - return cr3; -} +extern struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm); +extern void unuse_temporary_mm(struct mm_struct *prev_mm); #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h deleted file mode 100644 index f572d0f944bb..000000000000 --- a/arch/x86/include/asm/mmx.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MMX_H -#define _ASM_X86_MMX_H - -/* - * MMX 3Dnow! helper operations - */ - -#include <linux/types.h> - -extern void *_mmx_memcpy(void *to, const void *from, size_t size); -extern void mmx_clear_page(void *page); -extern void mmx_copy_page(void *to, void *from); - -#endif /* _ASM_X86_MMX_H */ diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h deleted file mode 100644 index c41b41edd691..000000000000 --- a/arch/x86/include/asm/mmzone.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_X86_32 -# include <asm/mmzone_32.h> -#else -# include <asm/mmzone_64.h> -#endif diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h deleted file mode 100644 index 73d8dd14dda2..000000000000 --- a/arch/x86/include/asm/mmzone_32.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002 - * - */ - -#ifndef _ASM_X86_MMZONE_32_H -#define _ASM_X86_MMZONE_32_H - -#include <asm/smp.h> - -#ifdef CONFIG_NUMA -extern struct pglist_data *node_data[]; -#define NODE_DATA(nid) (node_data[nid]) -#endif /* CONFIG_NUMA */ - -#ifdef CONFIG_DISCONTIGMEM - -/* - * generic node memory support, the following assumptions apply: - * - * 1) memory comes in 64Mb contiguous chunks which are either present or not - * 2) we will not have more than 64Gb in total - * - * for now assume that 64Gb is max amount of RAM for whole system - * 64Gb / 4096bytes/page = 16777216 pages - */ -#define MAX_NR_PAGES 16777216 -#define MAX_SECTIONS 1024 -#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS) - -extern s8 physnode_map[]; - -static inline int pfn_to_nid(unsigned long pfn) -{ -#ifdef CONFIG_NUMA - return((int) physnode_map[(pfn) / PAGES_PER_SECTION]); -#else - return 0; -#endif -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#define early_pfn_valid(pfn) pfn_valid((pfn)) - -#endif /* CONFIG_DISCONTIGMEM */ - -#endif /* _ASM_X86_MMZONE_32_H */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h deleted file mode 100644 index 0c585046f744..000000000000 --- a/arch/x86/include/asm/mmzone_64.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* K8 NUMA support */ -/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */ -/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */ -#ifndef _ASM_X86_MMZONE_64_H -#define _ASM_X86_MMZONE_64_H - -#ifdef CONFIG_NUMA - -#include <linux/mmdebug.h> -#include <asm/smp.h> - -extern struct pglist_data *node_data[]; - -#define NODE_DATA(nid) (node_data[nid]) - -#endif -#endif /* _ASM_X86_MMZONE_64_H */ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 7948a17febb4..3c2de4ce3b10 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -5,70 +5,20 @@ #include <asm-generic/module.h> #include <asm/orc_types.h> +struct its_array { +#ifdef CONFIG_MITIGATION_ITS + void **pages; + int num; +#endif +}; + struct mod_arch_specific { #ifdef CONFIG_UNWINDER_ORC unsigned int num_orcs; int *orc_unwind_ip; struct orc_entry *orc_unwind; #endif + struct its_array its_pages; }; -#ifdef CONFIG_X86_64 -/* X86_64 does not define MODULE_PROC_FAMILY */ -#elif defined CONFIG_M486 -#define MODULE_PROC_FAMILY "486 " -#elif defined CONFIG_M586 -#define MODULE_PROC_FAMILY "586 " -#elif defined CONFIG_M586TSC -#define MODULE_PROC_FAMILY "586TSC " -#elif defined CONFIG_M586MMX -#define MODULE_PROC_FAMILY "586MMX " -#elif defined CONFIG_MCORE2 -#define MODULE_PROC_FAMILY "CORE2 " -#elif defined CONFIG_MATOM -#define MODULE_PROC_FAMILY "ATOM " -#elif defined CONFIG_M686 -#define MODULE_PROC_FAMILY "686 " -#elif defined CONFIG_MPENTIUMII -#define MODULE_PROC_FAMILY "PENTIUMII " -#elif defined CONFIG_MPENTIUMIII -#define MODULE_PROC_FAMILY "PENTIUMIII " -#elif defined CONFIG_MPENTIUMM -#define MODULE_PROC_FAMILY "PENTIUMM " -#elif defined CONFIG_MPENTIUM4 -#define MODULE_PROC_FAMILY "PENTIUM4 " -#elif defined CONFIG_MK6 -#define MODULE_PROC_FAMILY "K6 " -#elif defined CONFIG_MK7 -#define MODULE_PROC_FAMILY "K7 " -#elif defined CONFIG_MK8 -#define MODULE_PROC_FAMILY "K8 " -#elif defined CONFIG_MELAN -#define MODULE_PROC_FAMILY "ELAN " -#elif defined CONFIG_MCRUSOE -#define MODULE_PROC_FAMILY "CRUSOE " -#elif defined CONFIG_MEFFICEON -#define MODULE_PROC_FAMILY "EFFICEON " -#elif defined CONFIG_MWINCHIPC6 -#define MODULE_PROC_FAMILY "WINCHIPC6 " -#elif defined CONFIG_MWINCHIP3D -#define MODULE_PROC_FAMILY "WINCHIP3D " -#elif defined CONFIG_MCYRIXIII -#define MODULE_PROC_FAMILY "CYRIXIII " -#elif defined CONFIG_MVIAC3_2 -#define MODULE_PROC_FAMILY "VIAC3-2 " -#elif defined CONFIG_MVIAC7 -#define MODULE_PROC_FAMILY "VIAC7 " -#elif defined CONFIG_MGEODEGX1 -#define MODULE_PROC_FAMILY "GEODEGX1 " -#elif defined CONFIG_MGEODE_LX -#define MODULE_PROC_FAMILY "GEODE " -#else -#error unknown processor family -#endif - -#ifdef CONFIG_X86_32 -# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY -#endif - #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 606cbaebd336..d593e52e6635 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_MPSPEC_H #define _ASM_X86_MPSPEC_H +#include <linux/types.h> #include <asm/mpspec_def.h> #include <asm/x86_init.h> @@ -15,16 +16,14 @@ extern int pic_mode; * Summit or generic (i.e. installer) kernels need lots of bus entries. * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#if CONFIG_BASE_SMALL == 0 -# define MAX_MP_BUSSES 260 -#else +#ifdef CONFIG_BASE_SMALL # define MAX_MP_BUSSES 32 +#else +# define MAX_MP_BUSSES 260 #endif #define MAX_IRQ_SOURCES 256 -extern unsigned int def_to_bigsmp; - #else /* CONFIG_X86_64: */ #define MAX_MP_BUSSES 256 @@ -39,9 +38,8 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES]; extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -extern unsigned int boot_cpu_physical_apicid; +extern u32 boot_cpu_physical_apicid; extern u8 boot_cpu_apic_version; -extern unsigned long mp_lapic_addr; #ifdef CONFIG_X86_LOCAL_APIC extern int smp_found_config; @@ -49,106 +47,31 @@ extern int smp_found_config; # define smp_found_config 0 #endif -static inline void get_smp_config(void) -{ - x86_init.mpparse.get_smp_config(0); -} - -static inline void early_get_smp_config(void) -{ - x86_init.mpparse.get_smp_config(1); -} - -static inline void find_smp_config(void) -{ - x86_init.mpparse.find_smp_config(); -} - #ifdef CONFIG_X86_MPPARSE extern void e820__memblock_alloc_reserved_mpc_new(void); extern int enable_update_mptable; -extern int default_mpc_apic_id(struct mpc_cpu *m); -extern void default_smp_read_mpc_oem(struct mpc_table *mpc); -# ifdef CONFIG_X86_IO_APIC -extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); -# else -# define default_mpc_oem_bus_info NULL -# endif -extern void default_find_smp_config(void); -extern void default_get_smp_config(unsigned int early); +extern void mpparse_find_mptable(void); +extern void mpparse_parse_early_smp_config(void); +extern void mpparse_parse_smp_config(void); #else static inline void e820__memblock_alloc_reserved_mpc_new(void) { } -#define enable_update_mptable 0 -#define default_mpc_apic_id NULL -#define default_smp_read_mpc_oem NULL -#define default_mpc_oem_bus_info NULL -#define default_find_smp_config x86_init_noop -#define default_get_smp_config x86_init_uint_noop +#define enable_update_mptable 0 +#define mpparse_find_mptable x86_init_noop +#define mpparse_parse_early_smp_config x86_init_noop +#define mpparse_parse_smp_config x86_init_noop #endif -int generic_processor_info(int apicid, int version); - -#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) - -struct physid_mask { - unsigned long mask[PHYSID_ARRAY_SIZE]; -}; +extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); -typedef struct physid_mask physid_mask_t; - -#define physid_set(physid, map) set_bit(physid, (map).mask) -#define physid_clear(physid, map) clear_bit(physid, (map).mask) -#define physid_isset(physid, map) test_bit(physid, (map).mask) -#define physid_test_and_set(physid, map) \ - test_and_set_bit(physid, (map).mask) - -#define physids_and(dst, src1, src2) \ - bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) - -#define physids_or(dst, src1, src2) \ - bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) - -#define physids_clear(map) \ - bitmap_zero((map).mask, MAX_LOCAL_APIC) - -#define physids_complement(dst, src) \ - bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC) - -#define physids_empty(map) \ - bitmap_empty((map).mask, MAX_LOCAL_APIC) - -#define physids_equal(map1, map2) \ - bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC) - -#define physids_weight(map) \ - bitmap_weight((map).mask, MAX_LOCAL_APIC) - -#define physids_shift_right(d, s, n) \ - bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC) - -#define physids_shift_left(d, s, n) \ - bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC) - -static inline unsigned long physids_coerce(physid_mask_t *map) -{ - return map->mask[0]; -} - -static inline void physids_promote(unsigned long physids, physid_mask_t *map) +static inline void reset_phys_cpu_present_map(u32 apicid) { - physids_clear(*map); - map->mask[0] = physids; + bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); + set_bit(apicid, phys_cpu_present_map); } -static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) +static inline void copy_phys_cpu_present_map(unsigned long *dst) { - physids_clear(*map); - physid_set(physid, *map); + bitmap_copy(dst, phys_cpu_present_map, MAX_LOCAL_APIC); } -#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } -#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } - -extern physid_mask_t phys_cpu_present_map; - #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h deleted file mode 100644 index d0b1434fb0b6..000000000000 --- a/arch/x86/include/asm/mpx.h +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MPX_H -#define _ASM_X86_MPX_H - -#include <linux/types.h> -#include <linux/mm_types.h> - -#include <asm/ptrace.h> -#include <asm/insn.h> - -/* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ -#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1) -#define MPX_BNDCFG_ENABLE_FLAG 0x1 -#define MPX_BD_ENTRY_VALID_FLAG 0x1 - -/* - * The upper 28 bits [47:20] of the virtual address in 64-bit - * are used to index into bounds directory (BD). - * - * The directory is 2G (2^31) in size, and with 8-byte entries - * it has 2^28 entries. - */ -#define MPX_BD_SIZE_BYTES_64 (1UL<<31) -#define MPX_BD_ENTRY_BYTES_64 8 -#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) - -/* - * The 32-bit directory is 4MB (2^22) in size, and with 4-byte - * entries it has 2^20 entries. - */ -#define MPX_BD_SIZE_BYTES_32 (1UL<<22) -#define MPX_BD_ENTRY_BYTES_32 4 -#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) - -/* - * A 64-bit table is 4MB total in size, and an entry is - * 4 64-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_64 (1UL<<22) -#define MPX_BT_ENTRY_BYTES_64 32 -#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) - -/* - * A 32-bit table is 16kB total in size, and an entry is - * 4 32-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_32 (1UL<<14) -#define MPX_BT_ENTRY_BYTES_32 16 -#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) - -#define MPX_BNDSTA_TAIL 2 -#define MPX_BNDCFG_TAIL 12 -#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) -#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) -#define MPX_BNDSTA_ERROR_CODE 0x3 - -struct mpx_fault_info { - void __user *addr; - void __user *lower; - void __user *upper; -}; - -#ifdef CONFIG_X86_INTEL_MPX -int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); -int mpx_handle_bd_fault(void); -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR); -} -static inline void mpx_mm_init(struct mm_struct *mm) -{ - /* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ - mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; -} -void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long start, unsigned long end); - -unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, - unsigned long flags); -#else -static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) -{ - return -EINVAL; -} -static inline int mpx_handle_bd_fault(void) -{ - return -EINVAL; -} -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return 0; -} -static inline void mpx_mm_init(struct mm_struct *mm) -{ -} -static inline void mpx_notify_unmap(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline unsigned long mpx_unmapped_area_check(unsigned long addr, - unsigned long len, unsigned long flags) -{ - return addr; -} -#endif /* CONFIG_X86_INTEL_MPX */ - -#endif /* _ASM_X86_MPX_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cc60e617931c..eef4c3a5ba28 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -3,162 +3,84 @@ #define _ASM_X86_MSHYPER_H #include <linux/types.h> -#include <linux/atomic.h> #include <linux/nmi.h> -#include <asm/io.h> -#include <asm/hyperv-tlfs.h> +#include <linux/msi.h> +#include <linux/io.h> +#include <linux/static_call.h> #include <asm/nospec-branch.h> +#include <asm/paravirt.h> +#include <asm/msr.h> +#include <hyperv/hvhdk.h> +#include <asm/fpu/types.h> -#define VP_INVAL U32_MAX +/* + * Hyper-V always provides a single IO-APIC at this MMIO address. + * Ideally, the value should be looked up in ACPI tables, but it + * is needed for mapping the IO-APIC early in boot on Confidential + * VMs, before ACPI functions can be used. + */ +#define HV_IOAPIC_BASE_ADDRESS 0xfec00000 -struct ms_hyperv_info { - u32 features; - u32 misc_features; - u32 hints; - u32 nested_features; - u32 max_vp_index; - u32 max_lp_index; -}; +#define HV_VTL_NORMAL 0x0 +#define HV_VTL_SECURE 0x1 +#define HV_VTL_MGMT 0x2 -extern struct ms_hyperv_info ms_hyperv; +union hv_ghcb; +DECLARE_STATIC_KEY_FALSE(isolation_type_snp); +DECLARE_STATIC_KEY_FALSE(isolation_type_tdx); typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, void *data); -/* - * Generate the guest ID. - */ - -static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, - __u64 d_info2) -{ - __u64 guest_id = 0; - - guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); - guest_id |= (d_info1 << 48); - guest_id |= (kernel_version << 16); - guest_id |= d_info2; - - return guest_id; -} - +void hyperv_vector_handler(struct pt_regs *regs); -/* Free the message slot and signal end-of-message if required */ -static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) +static inline unsigned char hv_get_nmi_reason(void) { - /* - * On crash we're reading some other CPU's message page and we need - * to be careful: this other CPU may already had cleared the header - * and the host may already had delivered some other message there. - * In case we blindly write msg->header.message_type we're going - * to lose it. We can still lose a message of the same type but - * we count on the fact that there can only be one - * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages - * on crash. - */ - if (cmpxchg(&msg->header.message_type, old_msg_type, - HVMSG_NONE) != old_msg_type) - return; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } + return 0; } -#define hv_init_timer(timer, tick) \ - wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick) -#define hv_init_timer_config(timer, val) \ - wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val) - -#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val) -#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val) +extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); +extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2); +extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2); -#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val) -#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val) - -#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val) -#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val) - -#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) +#if IS_ENABLED(CONFIG_HYPERV) +extern void *hv_hypercall_pg; -#define hv_get_synint_state(int_num, val) \ - rdmsrl(HV_X64_MSR_SINT0 + int_num, val) -#define hv_set_synint_state(int_num, val) \ - wrmsrl(HV_X64_MSR_SINT0 + int_num, val) +extern union hv_ghcb * __percpu *hv_ghcb_pg; -#define hv_get_crash_ctl(val) \ - rdmsrl(HV_X64_MSR_CRASH_CTL, val) +bool hv_isolation_type_snp(void); +bool hv_isolation_type_tdx(void); -void hyperv_callback_vector(void); -void hyperv_reenlightenment_vector(void); -#ifdef CONFIG_TRACING -#define trace_hyperv_callback_vector hyperv_callback_vector +#ifdef CONFIG_X86_64 +DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall); #endif -void hyperv_vector_handler(struct pt_regs *regs); -void hv_setup_vmbus_irq(void (*handler)(void)); -void hv_remove_vmbus_irq(void); - -void hv_setup_kexec_handler(void (*handler)(void)); -void hv_remove_kexec_handler(void); -void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); -void hv_remove_crash_handler(void); /* - * Routines for stimer0 Direct Mode handling. - * On x86/x64, there are no percpu actions to take. + * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA + * to start AP in enlightened SEV guest. */ -void hv_stimer0_vector_handler(struct pt_regs *regs); -void hv_stimer0_callback_vector(void); -int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); -void hv_remove_stimer0_irq(int irq); - -static inline void hv_enable_stimer0_percpu_irq(int irq) {} -static inline void hv_disable_stimer0_percpu_irq(int irq) {} - - -#if IS_ENABLED(CONFIG_HYPERV) -extern struct clocksource *hyperv_cs; -extern void *hv_hypercall_pg; -extern void __percpu **hyperv_pcpu_input_arg; +#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL +#define HV_AP_SEGMENT_LIMIT 0xffffffff +/* + * If the hypercall involves no input or output parameters, the hypervisor + * ignores the corresponding GPA pointer. + */ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; u64 output_address = output ? virt_to_phys(output) : 0; - u64 hv_status; #ifdef CONFIG_X86_64 - if (!hv_hypercall_pg) - return U64_MAX; - - __asm__ __volatile__("mov %4, %%r8\n" - CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input_address) - : "r" (output_address), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "memory", "r8", "r9", "r10", "r11"); + return static_call_mod(hv_hypercall)(control, input_address, output_address); #else u32 input_address_hi = upper_32_bits(input_address); u32 input_address_lo = lower_32_bits(input_address); u32 output_address_hi = upper_32_bits(output_address); u32 output_address_lo = lower_32_bits(output_address); + u64 hv_status; if (!hv_hypercall_pg) return U64_MAX; @@ -171,115 +93,69 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) "D"(output_address_hi), "S"(output_address_lo), THUNK_TARGET(hv_hypercall_pg) : "cc", "memory"); -#endif /* !x86_64 */ return hv_status; +#endif /* !x86_64 */ } /* Fast hypercall with 8 bytes of input and no output */ -static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) +static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; - #ifdef CONFIG_X86_64 - { - __asm__ __volatile__(CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : THUNK_TARGET(hv_hypercall_pg) - : "cc", "r8", "r9", "r10", "r11"); - } + return static_call_mod(hv_hypercall)(control, input1, 0); #else - { - u32 input1_hi = upper_32_bits(input1); - u32 input1_lo = lower_32_bits(input1); - - __asm__ __volatile__ (CALL_NOSPEC - : "=A"(hv_status), - "+c"(input1_lo), - ASM_CALL_CONSTRAINT - : "A" (control), - "b" (input1_hi), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "edi", "esi"); - } + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u64 hv_status; + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), + ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input1_hi), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "edi", "esi"); + return hv_status; #endif - return hv_status; } -/* Fast hypercall with 16 bytes of input */ -static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) { - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT; + return _hv_do_fast_hypercall8(control, input1); +} + +/* Fast hypercall with 16 bytes of input */ +static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) +{ #ifdef CONFIG_X86_64 - { - __asm__ __volatile__("mov %4, %%r8\n" - CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : "r" (input2), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "r8", "r9", "r10", "r11"); - } + return static_call_mod(hv_hypercall)(control, input1, input2); #else - { - u32 input1_hi = upper_32_bits(input1); - u32 input1_lo = lower_32_bits(input1); - u32 input2_hi = upper_32_bits(input2); - u32 input2_lo = lower_32_bits(input2); - - __asm__ __volatile__ (CALL_NOSPEC - : "=A"(hv_status), - "+c"(input1_lo), ASM_CALL_CONSTRAINT - : "A" (control), "b" (input1_hi), - "D"(input2_hi), "S"(input2_lo), - THUNK_TARGET(hv_hypercall_pg) - : "cc"); - } -#endif + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u32 input2_hi = upper_32_bits(input2); + u32 input2_lo = lower_32_bits(input2); + u64 hv_status; + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), ASM_CALL_CONSTRAINT + : "A" (control), "b" (input1_hi), + "D"(input2_hi), "S"(input2_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc"); return hv_status; +#endif } -/* - * Rep hypercalls. Callers of this functions are supposed to ensure that - * rep_count and varhead_size comply with Hyper-V hypercall definition. - */ -static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, - void *input, void *output) +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) { - u64 control = code; - u64 status; - u16 rep_comp; - - control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; - control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; - - do { - status = hv_do_hypercall(control, input, output); - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) - return status; - - /* Bits 32-43 of status have 'Reps completed' data. */ - rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> - HV_HYPERCALL_REP_COMP_OFFSET; - - control &= ~HV_HYPERCALL_REP_START_MASK; - control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; - - touch_nmi_watchdog(); - } while (rep_comp < rep_count); + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT; - return status; + return _hv_do_fast_hypercall16(control, input1, input2); } -/* - * Hypervisor's notion of virtual processor ID is different from - * Linux' notion of CPU ID. This information can only be retrieved - * in the context of the calling CPU. Setup a map for easy access - * to this information. - */ -extern u32 *hv_vp_index; -extern u32 hv_max_vp_index; extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) @@ -290,65 +166,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) return hv_vp_assist_page[cpu]; } -/** - * hv_cpu_number_to_vp_number() - Map CPU to VP. - * @cpu_number: CPU number in Linux terms - * - * This function returns the mapping between the Linux processor - * number and the hypervisor's virtual processor number, useful - * in making hypercalls and such that talk about specific - * processors. - * - * Return: Virtual processor number in Hyper-V terms - */ -static inline int hv_cpu_number_to_vp_number(int cpu_number) -{ - return hv_vp_index[cpu_number]; -} - -static inline int cpumask_to_vpset(struct hv_vpset *vpset, - const struct cpumask *cpus) -{ - int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; - - /* valid_bank_mask can represent up to 64 banks */ - if (hv_max_vp_index / 64 >= 64) - return 0; - - /* - * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex - * structs are not cleared between calls, we risk flushing unneeded - * vCPUs otherwise. - */ - for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) - vpset->bank_contents[vcpu_bank] = 0; - - /* - * Some banks may end up being empty but this is acceptable. - */ - for_each_cpu(cpu, cpus) { - vcpu = hv_cpu_number_to_vp_number(cpu); - if (vcpu == VP_INVAL) - return -1; - vcpu_bank = vcpu / 64; - vcpu_offset = vcpu % 64; - __set_bit(vcpu_offset, (unsigned long *) - &vpset->bank_contents[vcpu_bank]); - if (vcpu_bank >= nr_bank) - nr_bank = vcpu_bank + 1; - } - vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); - return nr_bank; -} - void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyperv_report_panic(struct pt_regs *regs, long err); -void hyperv_report_panic_msg(phys_addr_t pa, size_t size); -bool hv_is_hyperv_initialized(void); -void hyperv_cleanup(void); - -void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); @@ -358,6 +177,8 @@ int hyperv_flush_guest_mapping_range(u64 as, int hyperv_fill_flush_guest_mapping_list( struct hv_guest_mapping_flush_list *flush, u64 start_gfn, u64 end_gfn); +void hv_sleep_notifiers_register(void); +void hv_machine_power_off(void); #ifdef CONFIG_X86_64 void hv_apic_init(void); @@ -367,10 +188,69 @@ bool hv_vcpu_is_preempted(int vcpu); static inline void hv_apic_init(void) {} #endif +struct irq_domain *hv_create_pci_msi_domain(void); + +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry); +int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, + struct hv_interrupt_entry *entry); +int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); + +#ifdef CONFIG_AMD_MEM_ENCRYPT +bool hv_ghcb_negotiate_protocol(void); +void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu); +#else +static inline bool hv_ghcb_negotiate_protocol(void) { return false; } +static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, + unsigned int cpu) { return 0; } +#endif + +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) +void hv_vtom_init(void); +void hv_ivm_msr_write(u64 msr, u64 value); +void hv_ivm_msr_read(u64 msr, u64 *value); +#else +static inline void hv_vtom_init(void) {} +static inline void hv_ivm_msr_write(u64 msr, u64 value) {} +static inline void hv_ivm_msr_read(u64 msr, u64 *value) {} +#endif + +static inline bool hv_is_synic_msr(unsigned int reg) +{ + return (reg >= HV_X64_MSR_SCONTROL) && + (reg <= HV_X64_MSR_SINT15); +} + +static inline bool hv_is_sint_msr(unsigned int reg) +{ + return (reg >= HV_X64_MSR_SINT0) && + (reg <= HV_X64_MSR_SINT15); +} + +u64 hv_get_msr(unsigned int reg); +void hv_set_msr(unsigned int reg, u64 value); +u64 hv_get_non_nested_msr(unsigned int reg); +void hv_set_non_nested_msr(unsigned int reg, u64 value); + +static __always_inline u64 hv_raw_get_msr(unsigned int reg) +{ + return native_rdmsrq(reg); +} +int hv_apicid_to_vp_index(u32 apic_id); + +#if IS_ENABLED(CONFIG_MSHV_ROOT) && IS_ENABLED(CONFIG_CRASH_DUMP) +void hv_root_crash_init(void); +void hv_crash_asm32(void); +void hv_crash_asm64(void); +void hv_crash_asm_end(void); +#else /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */ +static inline void hv_root_crash_init(void) {} +#endif /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */ + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} -static inline bool hv_is_hyperv_initialized(void) { return false; } -static inline void hyperv_cleanup(void) {} static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} @@ -385,75 +265,55 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, { return -1; } +static inline void hv_set_msr(unsigned int reg, u64 value) { } +static inline u64 hv_get_msr(unsigned int reg) { return 0; } +static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { } +static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } +static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; } #endif /* CONFIG_HYPERV */ -#ifdef CONFIG_HYPERV_TSCPAGE -struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - u64 scale, offset; - u32 sequence; - - /* - * The protocol for reading Hyper-V TSC page is specified in Hypervisor - * Top-Level Functional Specification ver. 3.0 and above. To get the - * reference time we must do the following: - * - READ ReferenceTscSequence - * A special '0' value indicates the time source is unreliable and we - * need to use something else. The currently published specification - * versions (up to 4.0b) contain a mistake and wrongly claim '-1' - * instead of '0' as the special value, see commit c35b82ef0294. - * - ReferenceTime = - * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset - * - READ ReferenceTscSequence again. In case its value has changed - * since our first reading we need to discard ReferenceTime and repeat - * the whole sequence as the hypervisor was updating the page in - * between. - */ - do { - sequence = READ_ONCE(tsc_pg->tsc_sequence); - if (!sequence) - return U64_MAX; - /* - * Make sure we read sequence before we read other values from - * TSC page. - */ - smp_rmb(); - - scale = READ_ONCE(tsc_pg->tsc_scale); - offset = READ_ONCE(tsc_pg->tsc_offset); - *cur_tsc = rdtsc_ordered(); - - /* - * Make sure we read sequence after we read all other values - * from TSC page. - */ - smp_rmb(); - - } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); -} +struct mshv_vtl_cpu_context { + union { + struct { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 cr2; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + }; + u64 gp_regs[16]; + }; + + struct fxregs_state fx_state; +}; +#ifdef CONFIG_HYPERV_VTL_MODE +void __init hv_vtl_init_platform(void); +int __init hv_vtl_early_init(void); +void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0); +void mshv_vtl_return_call_init(u64 vtl_return_offset); +void mshv_vtl_return_hypercall(void); +void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0); #else -static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return NULL; -} - -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - BUG(); - return U64_MAX; -} +static inline void __init hv_vtl_init_platform(void) {} +static inline int __init hv_vtl_early_init(void) { return 0; } +static inline void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {} +static inline void mshv_vtl_return_call_init(u64 vtl_return_offset) {} +static inline void mshv_vtl_return_hypercall(void) {} +static inline void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {} #endif + +#include <asm-generic/mshyperv.h> + #endif diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index 25ddd0916bb2..935c6d470341 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -9,6 +9,63 @@ typedef struct irq_alloc_info msi_alloc_info_t; int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); -void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc); +/* Structs and defines for the X86 specific MSI message format */ + +typedef struct x86_msi_data { + union { + struct { + u32 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + reserved : 2, + active_low : 1, + is_level : 1; + }; + u32 dmar_subhandle; + }; +} __attribute__ ((packed)) arch_msi_msg_data_t; +#define arch_msi_msg_data x86_msi_data + +typedef struct x86_msi_addr_lo { + union { + struct { + u32 reserved_0 : 2, + dest_mode_logical : 1, + redirect_hint : 1, + reserved_1 : 1, + virt_destid_8_14 : 7, + destid_0_7 : 8, + base_address : 12; + }; + struct { + u32 dmar_reserved_0 : 2, + dmar_index_15 : 1, + dmar_subhandle_valid : 1, + dmar_format : 1, + dmar_index_0_14 : 15, + dmar_base_address : 12; + }; + }; +} __attribute__ ((packed)) arch_msi_msg_addr_lo_t; +#define arch_msi_msg_addr_lo x86_msi_addr_lo + +#define X86_MSI_BASE_ADDRESS_LOW (0xfee00000 >> 20) + +typedef struct x86_msi_addr_hi { + u32 reserved : 8, + destid_8_31 : 24; +} __attribute__ ((packed)) arch_msi_msg_addr_hi_t; +#define arch_msi_msg_addr_hi x86_msi_addr_hi + +#define X86_MSI_BASE_ADDRESS_HIGH (0) + +struct msi_msg; +u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid); + +#define X86_VECTOR_MSI_FLAGS_SUPPORTED \ + (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX | MSI_FLAG_PCI_MSIX_ALLOC_DYN) + +#define X86_VECTOR_MSI_FLAGS_REQUIRED \ + (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS) #endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h deleted file mode 100644 index ee2f8ccc32d0..000000000000 --- a/arch/x86/include/asm/msidef.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MSIDEF_H -#define _ASM_X86_MSIDEF_H - -/* - * Constants for Intel APIC based MSI messages. - */ - -/* - * Shifts for MSI data - */ - -#define MSI_DATA_VECTOR_SHIFT 0 -#define MSI_DATA_VECTOR_MASK 0x000000ff -#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \ - MSI_DATA_VECTOR_MASK) - -#define MSI_DATA_DELIVERY_MODE_SHIFT 8 -#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) -#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) - -#define MSI_DATA_LEVEL_SHIFT 14 -#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) -#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) - -#define MSI_DATA_TRIGGER_SHIFT 15 -#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) -#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) - -/* - * Shift/mask fields for msi address - */ - -#define MSI_ADDR_BASE_HI 0 -#define MSI_ADDR_BASE_LO 0xfee00000 - -#define MSI_ADDR_DEST_MODE_SHIFT 2 -#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT) -#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT) - -#define MSI_ADDR_REDIRECTION_SHIFT 3 -#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) - /* dedicated cpu */ -#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) - /* lowest priority */ - -#define MSI_ADDR_DEST_ID_SHIFT 12 -#define MSI_ADDR_DEST_ID_MASK 0x00ffff0 -#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ - MSI_ADDR_DEST_ID_MASK) -#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00) - -#define MSI_ADDR_IR_EXT_INT (1 << 4) -#define MSI_ADDR_IR_SHV (1 << 3) -#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) -#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) -#endif /* _ASM_X86_MSIDEF_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8e40c2446fd1..3d0a0950d20a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -2,12 +2,9 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H -/* - * CPU model specific register (MSR) numbers. - * - * Do not add new entries to this file unless the definitions are shared - * between multiple compilation units. - */ +#include <linux/bits.h> + +/* CPU model specific register (MSR) numbers. */ /* x86-64 specific MSRs */ #define MSR_EFER 0xc0000080 /* extended feature register */ @@ -28,6 +25,8 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_TCE 15 /* Enable Translation Cache Extensions */ +#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -36,18 +35,59 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_TCE (1<<_EFER_TCE) +#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) + +/* + * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc. + * Most MSRs support/allow only a subset of memory types, but the values + * themselves are common across all relevant MSRs. + */ +#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */ +#define X86_MEMTYPE_WC 1ull /* Write Combining */ +/* RESERVED 2 */ +/* RESERVED 3 */ +#define X86_MEMTYPE_WT 4ull /* Write Through */ +#define X86_MEMTYPE_WP 5ull /* Write Protected */ +#define X86_MEMTYPE_WB 6ull /* Write Back */ +#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */ + +/* FRED MSRs */ +#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */ +#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */ +#define MSR_IA32_FRED_RSP2 0x1ce /* Level 2 stack pointer */ +#define MSR_IA32_FRED_RSP3 0x1cf /* Level 3 stack pointer */ +#define MSR_IA32_FRED_STKLVLS 0x1d0 /* Exception stack levels */ +#define MSR_IA32_FRED_SSP0 MSR_IA32_PL0_SSP /* Level 0 shadow stack pointer */ +#define MSR_IA32_FRED_SSP1 0x1d1 /* Level 1 shadow stack pointer */ +#define MSR_IA32_FRED_SSP2 0x1d2 /* Level 2 shadow stack pointer */ +#define MSR_IA32_FRED_SSP3 0x1d3 /* Level 3 shadow stack pointer */ +#define MSR_IA32_FRED_CONFIG 0x1d4 /* Entrypoint and interrupt stack level */ /* Intel MSRs. Some also available on other CPUs */ +#define MSR_TEST_CTRL 0x00000033 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT) #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) + +/* A mask for bits which the kernel toggles when controlling mitigations */ +#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -59,6 +99,22 @@ #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) +#define MSR_IA32_UMWAIT_CONTROL 0xe1 +#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) +#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) + +/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ +#define MSR_IA32_CORE_CAPS 0x000000cf +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT) +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5 +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT) + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) @@ -69,24 +125,125 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ +#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* + * The processor is not susceptible to a + * machine check error due to modifying the + * code page size along with either the + * physical address or cache type + * without TLB invalidation. + */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to + * TSX Async Abort (TAA) vulnerabilities. + */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_MCU_ENUM BIT(16) /* + * Indicates the presence of microcode update + * feature enumeration and status information. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ +#define ARCH_CAP_XAPIC_DISABLE BIT(21) /* + * IA32_XAPIC_DISABLE_STATUS MSR + * supported + */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ +#define ARCH_CAP_GDS_CTRL BIT(25) /* + * CPU is vulnerable to Gather + * Data Sampling (GDS) and + * has controls for mitigation. + */ +#define ARCH_CAP_GDS_NO BIT(26) /* + * CPU is not vulnerable to Gather + * Data Sampling (GDS). + */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ +#define ARCH_CAP_ITS_NO BIT_ULL(62) /* + * Not susceptible to + * Indirect Target Selection. + * This bit is not set by + * HW, but is synthesized by + * VMMs for guests to know + * their affected status. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e +#define MSR_IA32_TSX_CTRL 0x00000122 +#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ +#define RTM_ALLOW BIT(1) /* TSX development mode */ +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ +#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */ +#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */ + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 @@ -94,6 +251,7 @@ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_ERROR_CONTROL 0x0000017f #define MSR_IA32_MCG_EXT_CTL 0x000004d0 #define MSR_OFFCORE_RSP_0 0x000001a6 @@ -102,8 +260,25 @@ #define MSR_TURBO_RATIO_LIMIT1 0x000001ae #define MSR_TURBO_RATIO_LIMIT2 0x000001af +#define MSR_SNOOP_RSP_0 0x00001328 +#define MSR_SNOOP_RSP_1 0x00001329 + #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 + +#define MSR_IA32_POWER_CTL 0x000001fc +#define MSR_IA32_POWER_CTL_BIT_EE 19 + +/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ +#define MSR_INTEGRITY_CAPS 0x000002d9 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT 2 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) +#define MSR_INTEGRITY_CAPS_SBAF_BIT 8 +#define MSR_INTEGRITY_CAPS_SBAF BIT(MSR_INTEGRITY_CAPS_SBAF_BIT) +#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9) + #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 #define MSR_LBR_CORE_FROM 0x00000040 @@ -113,13 +288,69 @@ #define LBR_INFO_MISPRED BIT_ULL(63) #define LBR_INFO_IN_TX BIT_ULL(62) #define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60) #define LBR_INFO_CYCLES 0xffff +#define LBR_INFO_BR_TYPE_OFFSET 56 +#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) +#define LBR_INFO_BR_CNTR_OFFSET 32 +#define LBR_INFO_BR_CNTR_NUM 4 +#define LBR_INFO_BR_CNTR_BITS 2 +#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0) +#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0) + +#define MSR_ARCH_LBR_CTL 0x000014ce +#define ARCH_LBR_CTL_LBREN BIT(0) +#define ARCH_LBR_CTL_CPL_OFFSET 1 +#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET) +#define ARCH_LBR_CTL_STACK_OFFSET 3 +#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET) +#define ARCH_LBR_CTL_FILTER_OFFSET 16 +#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET) +#define MSR_ARCH_LBR_DEPTH 0x000014cf +#define MSR_ARCH_LBR_FROM_0 0x00001500 +#define MSR_ARCH_LBR_TO_0 0x00001600 +#define MSR_ARCH_LBR_INFO_0 0x00001200 #define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_PEBS_DATA_CFG 0x000003f2 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define PERF_CAP_METRICS_IDX 15 +#define PERF_CAP_PT_IDX 16 + #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 +#define PERF_CAP_LBR_FMT 0x3f +#define PERF_CAP_PEBS_TRAP BIT_ULL(6) +#define PERF_CAP_ARCH_REG BIT_ULL(7) +#define PERF_CAP_PEBS_FORMAT 0xf00 +#define PERF_CAP_FW_WRITES BIT_ULL(13) +#define PERF_CAP_PEBS_BASELINE BIT_ULL(14) +#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17) +#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ + PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \ + PERF_CAP_PEBS_TIMING_INFO) + +/* Arch PEBS */ +#define MSR_IA32_PEBS_BASE 0x000003f4 +#define MSR_IA32_PEBS_INDEX 0x000003f5 +#define ARCH_PEBS_OFFSET_MASK 0x7fffff +#define ARCH_PEBS_INDEX_WR_SHIFT 4 + +#define ARCH_PEBS_RELOAD 0xffffffff +#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35) +#define ARCH_PEBS_CNTR_GP BIT_ULL(36) +#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37) +#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38) +#define ARCH_PEBS_LBR_SHIFT 40 +#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT) +#define ARCH_PEBS_VECR_XMM BIT_ULL(49) +#define ARCH_PEBS_GPR BIT_ULL(61) +#define ARCH_PEBS_AUX BIT_ULL(62) +#define ARCH_PEBS_EN BIT_ULL(63) +#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \ + ARCH_PEBS_CNTR_METRICS) + #define MSR_IA32_RTIT_CTL 0x00000570 #define RTIT_CTL_TRACEEN BIT(0) #define RTIT_CTL_CYCLEACC BIT(1) @@ -135,6 +366,8 @@ #define RTIT_CTL_DISRETC BIT(11) #define RTIT_CTL_PTW_EN BIT(12) #define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_EVENT_EN BIT(31) +#define RTIT_CTL_NOTNT BIT_ULL(55) #define RTIT_CTL_MTC_RANGE_OFFSET 14 #define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) #define RTIT_CTL_CYC_THRESH_OFFSET 19 @@ -185,16 +418,27 @@ #define MSR_IA32_CR_PAT 0x00000277 +#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 #define MSR_IA32_LASTBRANCHFROMIP 0x000001db #define MSR_IA32_LASTBRANCHTOIP 0x000001dc #define MSR_IA32_LASTINTFROMIP 0x000001dd #define MSR_IA32_LASTINTTOIP 0x000001de +#define MSR_IA32_PASID 0x00000d93 +#define MSR_IA32_PASID_VALID BIT_ULL(31) + /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) @@ -204,11 +448,10 @@ #define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) +#define DEBUGCTLMSR_RTM_DEBUG BIT(15) #define MSR_PEBS_FRONTEND 0x000003f7 -#define MSR_IA32_POWER_CTL 0x000001fc - #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 @@ -238,6 +481,7 @@ /* Run Time Average Power Limiting (RAPL) Interface */ +#define MSR_VR_CURRENT_CONFIG 0x00000601 #define MSR_RAPL_POWER_UNIT 0x00000606 #define MSR_PKG_POWER_LIMIT 0x00000610 @@ -259,6 +503,10 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 +#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 +#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a +#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b + /* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 @@ -267,6 +515,7 @@ #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C #define MSR_PLATFORM_ENERGY_STATUS 0x0000064D +#define MSR_SECONDARY_TURBO_RATIO_LIMIT 0x00000650 #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 @@ -284,11 +533,29 @@ #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d - #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 +/* Control-flow Enforcement Technology MSRs */ +#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */ +#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */ +#define CET_SHSTK_EN BIT_ULL(0) +#define CET_WRSS_EN BIT_ULL(1) +#define CET_ENDBR_EN BIT_ULL(2) +#define CET_LEG_IW_EN BIT_ULL(3) +#define CET_NO_TRACK_EN BIT_ULL(4) +#define CET_SUPPRESS_DISABLE BIT_ULL(5) +#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9)) +#define CET_SUPPRESS BIT_ULL(10) +#define CET_WAIT_ENDBR BIT_ULL(11) + +#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */ +#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */ +#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */ +#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */ +#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */ + /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f @@ -296,7 +563,7 @@ #define MSR_HWP_CAPABILITIES 0x00000771 #define MSR_HWP_REQUEST_PKG 0x00000772 #define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_REQUEST 0x00000774 #define MSR_HWP_STATUS 0x00000777 /* CPUID.6.EAX */ @@ -313,16 +580,16 @@ #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24) #define HWP_EPP_PERFORMANCE 0x00 #define HWP_EPP_BALANCE_PERFORMANCE 0x80 #define HWP_EPP_BALANCE_POWERSAVE 0xC0 #define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) +#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -358,17 +625,55 @@ /* Alternative perfctr range with full access. */ #define MSR_IA32_PMC0 0x000004c1 -/* AMD64 MSRs. Not complete. See the architecture manual for a more - complete list. */ +/* Auto-reload via MSR instead of DS area */ +#define MSR_RELOAD_PMC0 0x000014c1 +#define MSR_RELOAD_FIXED_CTR0 0x00001309 + +/* V6 PMON MSR range */ +#define MSR_IA32_PMC_V6_GP0_CTR 0x1900 +#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902 +#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903 +#define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982 +#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983 +#define MSR_IA32_PMC_V6_STEP 4 +/* KeyID partitioning between MKTME and TDX */ +#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 + +/* + * AMD64 MSRs. Not complete. See the architecture manual for a more + * complete list. + */ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f #define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD_PERF_CTL 0xc0010062 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD_PPIN_CTL 0xc00102f0 +#define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 +#define MSR_AMD64_CPUID_FN_1 0xc0011004 + +#define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005 +#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54 +#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT) + #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_TW_CFG 0xc0011023 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) +#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9 + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -386,13 +691,111 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) #define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f +#define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f +#define MSR_AMD64_SEV_ES_ENABLED_BIT 1 +#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) +#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 +#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) +#define MSR_AMD64_SNP_VTOM_BIT 3 +#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT) +#define MSR_AMD64_SNP_REFLECT_VC_BIT 4 +#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT) +#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5 +#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT) +#define MSR_AMD64_SNP_ALT_INJ_BIT 6 +#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT) +#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7 +#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT) +#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8 +#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT) +#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9 +#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT) +#define MSR_AMD64_SNP_VMPL_SSS_BIT 10 +#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT) +#define MSR_AMD64_SNP_SECURE_TSC_BIT 11 +#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT) +#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12 +#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT) +#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13) +#define MSR_AMD64_SNP_IBS_VIRT_BIT 14 +#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT) +#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15) +#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16 +#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) +#define MSR_AMD64_SNP_SMT_PROT_BIT 17 +#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) +#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) +#define MSR_AMD64_SNP_RESV_BIT 19 +#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) +#define MSR_AMD64_SAVIC_CONTROL 0xc0010138 +#define MSR_AMD64_SAVIC_EN_BIT 0 +#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT) +#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1 +#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT) +#define MSR_AMD64_RMP_BASE 0xc0010132 +#define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) + +#define MSR_SVSM_CAA 0xc001f000 + +/* AMD Collaborative Processor Performance Control MSRs */ +#define MSR_AMD_CPPC_CAP1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_CAP2 0xc00102b2 +#define MSR_AMD_CPPC_REQ 0xc00102b3 +#define MSR_AMD_CPPC_STATUS 0xc00102b4 + +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) + +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) + +/* AMD Performance Counter Global Status and Control MSRs */ +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 +#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 + +/* AMD Hardware Feedback Support MSRs */ +#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 +#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501 +#define MSR_AMD_WORKLOAD_HRST 0xc0000502 + +/* AMD Last Branch Record MSRs */ +#define MSR_AMD64_LBR_SELECT 0xc000010e + +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 + +/* Fam 19h MSRs */ +#define MSR_F19H_UMC_PERF_CTL 0xc0010800 +#define MSR_F19H_UMC_PERF_CTR 0xc0010801 + +/* Zen 2 */ +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -406,6 +809,8 @@ #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 /* Fam 15h MSRs */ +#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a +#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL #define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) @@ -436,16 +841,20 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 +#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) + #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 @@ -468,8 +877,13 @@ #define MSR_K7_HWCR 0xc0010015 #define MSR_K7_HWCR_SMMLOCK_BIT 0 #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) +#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35 #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -514,38 +928,55 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c #define MSR_SMI_COUNT 0x00000034 -#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */ +#define MSR_IA32_FEAT_CTL 0x0000003a +#define FEAT_CTL_LOCKED BIT(0) +#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) +#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_SGX_LC_ENABLED BIT(17) +#define FEAT_CTL_SGX_ENABLED BIT(18) +#define FEAT_CTL_LMCE_ENABLED BIT(20) + #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 #define MSR_IA32_XSS 0x00000da0 -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) -#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) -#define FEATURE_CONTROL_LMCE (1<<20) - #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) -#define MSR_IA32_TSCDEADLINE 0x000006e0 - #define MSR_IA32_UCODE_WRITE 0x00000079 + +#define MSR_IA32_MCU_ENUMERATION 0x0000007b +#define MCU_STAGING BIT(4) + #define MSR_IA32_UCODE_REV 0x0000008b +/* Intel SGX Launch Enclave Public Key Hash MSRs */ +#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C +#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D +#define MSR_IA32_SGXLEPUBKEYHASH2 0x0000008E +#define MSR_IA32_SGXLEPUBKEYHASH3 0x0000008F + #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b #define MSR_IA32_SMBASE 0x0000009e #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 #define INTEL_PERF_CTL_MASK 0xffff -#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 -#define MSR_AMD_PERF_STATUS 0xc0010063 -#define MSR_AMD_PERF_CTL 0xc0010062 + +/* AMD Branch Sampling configuration */ +#define MSR_AMD_DBG_EXTN_CFG 0xc000010f +#define MSR_AMD_SAMP_BR_FROM 0xc0010300 + +#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6) #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 @@ -577,6 +1008,7 @@ #define ENERGY_PERF_BIAS_PERFORMANCE 0 #define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 #define ENERGY_PERF_BIAS_NORMAL 6 +#define ENERGY_PERF_BIAS_NORMAL_POWERSAVE 7 #define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 #define ENERGY_PERF_BIAS_POWERSAVE 15 @@ -584,12 +1016,14 @@ #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) +#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26) #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) +#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25) /* Thermal Thresholds Support */ #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) @@ -666,6 +1100,16 @@ #define MSR_IA32_TSC_DEADLINE 0x000006E0 + +#define MSR_TSX_FORCE_ABORT 0x0000010F + +#define MSR_TFA_RTM_FORCE_ABORT_BIT 0 +#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) +#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1 +#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT) +#define MSR_TFA_SDV_ENABLE_RTM_BIT 2 +#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT) + /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 #define MSR_IA32_MCG_EBX 0x00000181 @@ -770,11 +1214,22 @@ #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a #define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 +#define MSR_PERF_METRICS 0x00000329 + +/* PERF_GLOBAL_OVF_CTL bits */ +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT) + /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 @@ -797,24 +1252,48 @@ #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 #define MSR_IA32_VMX_VMFUNC 0x00000491 +#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 + +#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5 + +/* Resctrl MSRs: */ +/* - Intel: */ +#define MSR_IA32_L3_QOS_CFG 0xc81 +#define MSR_IA32_L2_QOS_CFG 0xc82 +#define MSR_IA32_QM_EVTSEL 0xc8d +#define MSR_IA32_QM_CTR 0xc8e +#define MSR_IA32_PQR_ASSOC 0xc8f +#define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 +#define MSR_IA32_L2_CBM_BASE 0xd10 +#define MSR_IA32_MBA_THRTL_BASE 0xd50 + +/* - AMD: */ +#define MSR_IA32_MBA_BW_BASE 0xc0000200 +#define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd +#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff +#define MSR_IA32_EVT_CFG_BASE 0xc0000400 -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F /* AMD-V MSRs */ - #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + +/* Hardware Feedback Interface */ +#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 +#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 + +/* x2APIC locked status */ +#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD +#define LEGACY_XAPIC_DISABLED BIT(0) /* + * x2APIC mode is locked and + * disabling x2APIC will cause + * a #GP + */ + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 91e4cf189914..9c2ea29e12a9 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -4,28 +4,22 @@ #include "msr-index.h" -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/errno.h> #include <asm/cpumask.h> #include <uapi/asm/msr.h> +#include <asm/shared/msr.h> -struct msr { - union { - struct { - u32 l; - u32 h; - }; - u64 q; - }; -}; +#include <linux/types.h> +#include <linux/percpu.h> struct msr_info { - u32 msr_no; - struct msr reg; - struct msr *msrs; - int err; + u32 msr_no; + struct msr reg; + struct msr __percpu *msrs; + int err; }; struct msr_regs_info { @@ -44,41 +38,22 @@ struct saved_msrs { }; /* - * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" - * constraint has different meanings. For i386, "A" means exactly - * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, - * it means rax *or* rdx. - */ -#ifdef CONFIG_X86_64 -/* Using 64-bit values saves one instruction clearing the high half of low */ -#define DECLARE_ARGS(val, low, high) unsigned long low, high -#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) -#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) -#else -#define DECLARE_ARGS(val, low, high) unsigned long long val -#define EAX_EDX_VAL(val, low, high) (val) -#define EAX_EDX_RET(val, low, high) "=A" (val) -#endif - -#ifdef CONFIG_TRACEPOINTS -/* * Be very careful with includes. This header is prone to include loops. */ #include <asm/atomic.h> #include <linux/tracepoint-defs.h> -extern struct tracepoint __tracepoint_read_msr; -extern struct tracepoint __tracepoint_write_msr; -extern struct tracepoint __tracepoint_rdpmc; -#define msr_tracepoint_active(t) static_key_false(&(t).key) -extern void do_trace_write_msr(unsigned int msr, u64 val, int failed); -extern void do_trace_read_msr(unsigned int msr, u64 val, int failed); -extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed); +#ifdef CONFIG_TRACEPOINTS +DECLARE_TRACEPOINT(read_msr); +DECLARE_TRACEPOINT(write_msr); +DECLARE_TRACEPOINT(rdpmc); +extern void do_trace_write_msr(u32 msr, u64 val, int failed); +extern void do_trace_read_msr(u32 msr, u64 val, int failed); +extern void do_trace_rdpmc(u32 msr, u64 val, int failed); #else -#define msr_tracepoint_active(t) false -static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {} -static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} -static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} +static inline void do_trace_write_msr(u32 msr, u64 val, int failed) {} +static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {} +static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {} #endif /* @@ -88,24 +63,24 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} * think of extending them - you will be slapped with a stinking trout or a frozen * shark will reach you, wherever you are! You've been warned. */ -static inline unsigned long long notrace __rdmsr(unsigned int msr) +static __always_inline u64 __rdmsr(u32 msr) { - DECLARE_ARGS(val, low, high); + EAX_EDX_DECLARE_ARGS(val, low, high); asm volatile("1: rdmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); } -static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) +static __always_inline void __wrmsrq(u32 msr, u64 val) { asm volatile("1: wrmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) - : : "c" (msr), "a"(low), "d" (high) : "memory"); + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) + : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory"); } #define native_rdmsr(msr, val1, val2) \ @@ -115,129 +90,81 @@ do { \ (void)((val2) = (u32)(__val >> 32)); \ } while (0) +static __always_inline u64 native_rdmsrq(u32 msr) +{ + return __rdmsr(msr); +} + #define native_wrmsr(msr, low, high) \ - __wrmsr(msr, low, high) + __wrmsrq((msr), (u64)(high) << 32 | (low)) -#define native_wrmsrl(msr, val) \ - __wrmsr((msr), (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) +#define native_wrmsrq(msr, val) \ + __wrmsrq((msr), (val)) -static inline unsigned long long native_read_msr(unsigned int msr) +static inline u64 native_read_msr(u32 msr) { - unsigned long long val; + u64 val; val = __rdmsr(msr); - if (msr_tracepoint_active(__tracepoint_read_msr)) + if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, val, 0); return val; } -static inline unsigned long long native_read_msr_safe(unsigned int msr, - int *err) +static inline int native_read_msr_safe(u32 msr, u64 *p) { - DECLARE_ARGS(val, low, high); - - asm volatile("2: rdmsr ; xor %[err],%[err]\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err]\n\t" - "xorl %%eax, %%eax\n\t" - "xorl %%edx, %%edx\n\t" - "jmp 1b\n\t" - ".previous\n\t" - _ASM_EXTABLE(2b, 3b) - : [err] "=r" (*err), EAX_EDX_RET(val, low, high) - : "c" (msr), [fault] "i" (-EIO)); - if (msr_tracepoint_active(__tracepoint_read_msr)) - do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); - return EAX_EDX_VAL(val, low, high); + int err; + EAX_EDX_DECLARE_ARGS(val, low, high); + + asm volatile("1: rdmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err]) + : [err] "=r" (err), EAX_EDX_RET(val, low, high) + : "c" (msr)); + if (tracepoint_enabled(read_msr)) + do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), err); + + *p = EAX_EDX_VAL(val, low, high); + + return err; } /* Can be uninlined because referenced by paravirt */ -static inline void notrace -native_write_msr(unsigned int msr, u32 low, u32 high) +static inline void notrace native_write_msr(u32 msr, u64 val) { - __wrmsr(msr, low, high); + native_wrmsrq(msr, val); - if (msr_tracepoint_active(__tracepoint_write_msr)) - do_trace_write_msr(msr, ((u64)high << 32 | low), 0); + if (tracepoint_enabled(write_msr)) + do_trace_write_msr(msr, val, 0); } /* Can be uninlined because referenced by paravirt */ -static inline int notrace -native_write_msr_safe(unsigned int msr, u32 low, u32 high) +static inline int notrace native_write_msr_safe(u32 msr, u64 val) { int err; - asm volatile("2: wrmsr ; xor %[err],%[err]\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err] ; jmp 1b\n\t" - ".previous\n\t" - _ASM_EXTABLE(2b, 3b) + asm volatile("1: wrmsr ; xor %[err],%[err]\n" + "2:\n\t" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err]) : [err] "=a" (err) - : "c" (msr), "0" (low), "d" (high), - [fault] "i" (-EIO) + : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32)) : "memory"); - if (msr_tracepoint_active(__tracepoint_write_msr)) - do_trace_write_msr(msr, ((u64)high << 32 | low), err); + if (tracepoint_enabled(write_msr)) + do_trace_write_msr(msr, val, err); return err; } extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); -/** - * rdtsc() - returns the current TSC without ordering constraints - * - * rdtsc() returns the result of RDTSC as a 64-bit integer. The - * only ordering constraint it supplies is the ordering implied by - * "asm volatile": it will put the RDTSC in the place you expect. The - * CPU can and will speculatively execute that RDTSC, though, so the - * results can be non-monotonic if compared on different CPUs. - */ -static __always_inline unsigned long long rdtsc(void) +static inline u64 native_read_pmc(int counter) { - DECLARE_ARGS(val, low, high); - - asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); - - return EAX_EDX_VAL(val, low, high); -} - -/** - * rdtsc_ordered() - read the current TSC in program order - * - * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. - * It is ordered like a load to a global in-memory counter. It should - * be impossible to observe non-monotonic rdtsc_unordered() behavior - * across multiple CPUs as long as the TSC is synced. - */ -static __always_inline unsigned long long rdtsc_ordered(void) -{ - /* - * The RDTSC instruction is not ordered relative to memory - * access. The Intel SDM and the AMD APM are both vague on this - * point, but empirically an RDTSC instruction can be - * speculatively executed before prior loads. An RDTSC - * immediately after an appropriate barrier appears to be - * ordered as a normal load, that is, it provides the same - * ordering guarantees as reading from a global memory location - * that some other imaginary CPU is updating continuously with a - * time stamp. - */ - barrier_nospec(); - return rdtsc(); -} - -static inline unsigned long long native_read_pmc(int counter) -{ - DECLARE_ARGS(val, low, high); + EAX_EDX_DECLARE_ARGS(val, low, high); asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); - if (msr_tracepoint_active(__tracepoint_rdpmc)) + if (tracepoint_enabled(rdpmc)) do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } @@ -259,82 +186,86 @@ do { \ (void)((high) = (u32)(__val >> 32)); \ } while (0) -static inline void wrmsr(unsigned int msr, u32 low, u32 high) +static inline void wrmsr(u32 msr, u32 low, u32 high) { - native_write_msr(msr, low, high); + native_write_msr(msr, (u64)high << 32 | low); } -#define rdmsrl(msr, val) \ +#define rdmsrq(msr, val) \ ((val) = native_read_msr((msr))) -static inline void wrmsrl(unsigned int msr, u64 val) +static inline void wrmsrq(u32 msr, u64 val) { - native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32)); + native_write_msr(msr, val); } /* wrmsr with exception handling */ -static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high) +static inline int wrmsrq_safe(u32 msr, u64 val) { - return native_write_msr_safe(msr, low, high); + return native_write_msr_safe(msr, val); } /* rdmsr with exception handling */ #define rdmsr_safe(msr, low, high) \ ({ \ - int __err; \ - u64 __val = native_read_msr_safe((msr), &__err); \ + u64 __val; \ + int __err = native_read_msr_safe((msr), &__val); \ (*low) = (u32)__val; \ (*high) = (u32)(__val >> 32); \ __err; \ }) -static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p) +static inline int rdmsrq_safe(u32 msr, u64 *p) { - int err; + return native_read_msr_safe(msr, p); +} - *p = native_read_msr_safe(msr, &err); - return err; +static __always_inline u64 rdpmc(int counter) +{ + return native_read_pmc(counter); } -#define rdpmc(counter, low, high) \ -do { \ - u64 _l = native_read_pmc((counter)); \ - (low) = (u32)_l; \ - (high) = (u32)(_l >> 32); \ -} while (0) +#endif /* !CONFIG_PARAVIRT_XXL */ -#define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) -#endif /* !CONFIG_PARAVIRT_XXL */ +/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */ +static __always_inline void wrmsrns(u32 msr, u64 val) +{ + /* + * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant + * DS prefix to avoid a trailing NOP. + */ + asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS) + "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) + : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32))); +} /* - * 64-bit version of wrmsr_safe(): + * Dual u32 version of wrmsrq_safe(): */ -static inline int wrmsrl_safe(u32 msr, u64 val) +static inline int wrmsr_safe(u32 msr, u32 low, u32 high) { - return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); + return wrmsrq_safe(msr, (u64)high << 32 | low); } -#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high)) - -#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0) - -struct msr *msrs_alloc(void); -void msrs_free(struct msr *msrs); +struct msr __percpu *msrs_alloc(void); +void msrs_free(struct msr __percpu *msrs); int msr_set_bit(u32 msr, u8 bit); int msr_clear_bit(u32 msr, u8 bit); #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); -int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q); -void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); -void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); +int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); -int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q); +int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); +int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); #else /* CONFIG_SMP */ @@ -348,25 +279,25 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); return 0; } -static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +static inline int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { - rdmsrl(msr_no, *q); + rdmsrq(msr_no, *q); return 0; } -static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +static inline int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { - wrmsrl(msr_no, q); + wrmsrq(msr_no, q); return 0; } static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, - struct msr *msrs) + struct msr __percpu *msrs) { - rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); + rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h)); } static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, - struct msr *msrs) + struct msr __percpu *msrs) { - wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); + wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h)); } static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) @@ -377,13 +308,13 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { return wrmsr_safe(msr_no, l, h); } -static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +static inline int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { - return rdmsrl_safe(msr_no, q); + return rdmsrq_safe(msr_no, q); } -static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +static inline int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { - return wrmsrl_safe(msr_no, q); + return wrmsrq_safe(msr_no, q); } static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { @@ -394,5 +325,11 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) return wrmsr_safe_regs(regs); } #endif /* CONFIG_SMP */ -#endif /* __ASSEMBLY__ */ + +/* Compatibility wrappers: */ +#define rdmsrl(msr, val) rdmsrq(msr, val) +#define wrmsrl(msr, val) wrmsrq(msr, val) +#define rdmsrl_on_cpu(cpu, msr, q) rdmsrq_on_cpu(cpu, msr, q) + +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_MSR_H */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index dbff1456d215..76b95bd1a405 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.0+ */ /* Generic MTRR (Memory Type Range Register) ioctls. Copyright (C) 1997-1999 Richard Gooch - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - Richard Gooch may be reached by email at rgooch@atnf.csiro.au The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. @@ -23,15 +10,43 @@ #ifndef _ASM_X86_MTRR_H #define _ASM_X86_MTRR_H +#include <linux/bits.h> #include <uapi/asm/mtrr.h> -#include <asm/pat.h> +/* Defines for hardware MTRR registers. */ +#define MTRR_CAP_VCNT GENMASK(7, 0) +#define MTRR_CAP_FIX BIT_MASK(8) +#define MTRR_CAP_WC BIT_MASK(10) + +#define MTRR_DEF_TYPE_TYPE GENMASK(7, 0) +#define MTRR_DEF_TYPE_FE BIT_MASK(10) +#define MTRR_DEF_TYPE_E BIT_MASK(11) + +#define MTRR_DEF_TYPE_ENABLE (MTRR_DEF_TYPE_FE | MTRR_DEF_TYPE_E) +#define MTRR_DEF_TYPE_DISABLE ~(MTRR_DEF_TYPE_TYPE | MTRR_DEF_TYPE_ENABLE) + +#define MTRR_PHYSBASE_TYPE GENMASK(7, 0) +#define MTRR_PHYSBASE_RSVD GENMASK(11, 8) + +#define MTRR_PHYSMASK_RSVD GENMASK(10, 0) +#define MTRR_PHYSMASK_V BIT_MASK(11) + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + bool have_fixed; + mtrr_type def_type; +}; /* * The following functions are for use by other drivers that cannot use * arch_phys_wc_add and arch_phys_wc_del. */ # ifdef CONFIG_MTRR +void mtrr_bp_init(void); +void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var, + mtrr_type def_type); extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); @@ -41,21 +56,27 @@ extern int mtrr_add_page(unsigned long base, unsigned long size, unsigned int type, bool increment); extern int mtrr_del(int reg, unsigned long base, unsigned long size); extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); -extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); -extern void mtrr_ap_init(void); -extern void mtrr_bp_init(void); -extern void set_mtrr_aps_delayed_init(void); -extern void mtrr_aps_init(void); -extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); +void mtrr_disable(void); +void mtrr_enable(void); +void mtrr_generic_set_state(void); # else +static inline void guest_force_mtrr_state(struct mtrr_var_range *var, + unsigned int num_var, + mtrr_type def_type) +{ +} + static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { /* - * Return no-MTRRs: + * Return the default MTRR type, without any known other types in + * that range. */ - return MTRR_TYPE_INVALID; + *uniform = 1; + + return MTRR_TYPE_UNCACHABLE; } #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) @@ -81,18 +102,10 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) { return 0; } -static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ -} -static inline void mtrr_bp_init(void) -{ - pat_disable("MTRRs disabled, skipping PAT initialization too."); -} - -#define mtrr_ap_init() do {} while (0) -#define set_mtrr_aps_delayed_init() do {} while (0) -#define mtrr_aps_init() do {} while (0) -#define mtrr_bp_restore() do {} while (0) +#define mtrr_bp_init() do {} while (0) +#define mtrr_disable() do {} while (0) +#define mtrr_enable() do {} while (0) +#define mtrr_generic_set_state() do {} while (0) # endif #ifdef CONFIG_COMPAT @@ -127,7 +140,8 @@ struct mtrr_gentry32 { #endif /* CONFIG_COMPAT */ /* Bit fields for enabled in struct mtrr_state_type */ -#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01 -#define MTRR_STATE_MTRR_ENABLED 0x02 +#define MTRR_STATE_SHIFT 10 +#define MTRR_STATE_MTRR_FIXED_ENABLED (MTRR_DEF_TYPE_FE >> MTRR_STATE_SHIFT) +#define MTRR_STATE_MTRR_ENABLED (MTRR_DEF_TYPE_E >> MTRR_STATE_SHIFT) #endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 39a2fb29378a..e4815e15dc9a 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -6,43 +6,46 @@ #include <linux/sched/idle.h> #include <asm/cpufeature.h> +#include <asm/nospec-branch.h> #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 #define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) #define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK) +#define MWAIT_C1_SUBSTATE_MASK 0xf0 -#define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 #define CPUID5_ECX_INTERRUPT_BREAK 0x2 #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) -#define MWAITX_MAX_LOOPS ((u32)-1) -#define MWAITX_DISABLE_CSTATES 0xf +#define MWAITX_MAX_WAIT_CYCLES UINT_MAX +#define MWAITX_DISABLE_CSTATES 0xf0 +#define TPAUSE_C01_STATE 1 +#define TPAUSE_C02_STATE 0 -static inline void __monitor(const void *eax, unsigned long ecx, - unsigned long edx) +static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx) { - /* "monitor %eax, %ecx, %edx;" */ - asm volatile(".byte 0x0f, 0x01, 0xc8;" - :: "a" (eax), "c" (ecx), "d"(edx)); + /* + * Use the instruction mnemonic with implicit operands, as the LLVM + * assembler fails to assemble the mnemonic with explicit operands: + */ + asm volatile("monitor" :: "a" (eax), "c" (ecx), "d" (edx)); } -static inline void __monitorx(const void *eax, unsigned long ecx, - unsigned long edx) +static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) { - /* "monitorx %eax, %ecx, %edx;" */ - asm volatile(".byte 0x0f, 0x01, 0xfa;" - :: "a" (eax), "c" (ecx), "d"(edx)); + asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx)); } -static inline void __mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __mwait(u32 eax, u32 ecx) { - /* "mwait %eax, %ecx;" */ - asm volatile(".byte 0x0f, 0x01, 0xc9;" - :: "a" (eax), "c" (ecx)); + /* + * Use the instruction mnemonic with implicit operands, as the LLVM + * assembler fails to assemble the mnemonic with explicit operands: + */ + asm volatile("mwait" :: "a" (eax), "c" (ecx)); } /* @@ -71,20 +74,26 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) * EAX (logical) address to monitor * ECX #GP if not zero */ -static inline void __mwaitx(unsigned long eax, unsigned long ebx, - unsigned long ecx) +static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* "mwaitx %eax, %ebx, %ecx;" */ - asm volatile(".byte 0x0f, 0x01, 0xfb;" - :: "a" (eax), "b" (ebx), "c" (ecx)); + /* No need for TSA buffer clearing on AMD */ + + asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx)); } -static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +/* + * Re-enable interrupts right upon calling mwait in such a way that + * no interrupt can fire _before_ the execution of mwait, ie: no + * instruction must be placed between "sti" and "mwait". + * + * This is necessary because if an interrupt queues a timer before + * executing mwait, it would otherwise go unnoticed and the next tick + * would not be reprogrammed accordingly before mwait ever wakes up. + */ +static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - trace_hardirqs_on(); - /* "mwait %eax, %ecx;" */ - asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" - :: "a" (eax), "c" (ecx)); + + asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } /* @@ -97,20 +106,45 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ -static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { - if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { - mb(); - clflush((void *)¤t_thread_info()->flags); - mb(); - } + const void *addr = ¤t_thread_info()->flags; + + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); - __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) + if (need_resched()) + goto out; + + if (ecx & 1) { __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); + } } + +out: current_clr_polling(); } +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __tpause(u32 ecx, u32 edx, u32 eax) +{ + /* "tpause %ecx" */ + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1" + :: "c" (ecx), "d" (edx), "a" (eax)); +} + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 75ded1d13d98..79d88d12c8fb 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -9,21 +9,31 @@ #ifdef CONFIG_X86_LOCAL_APIC -extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); -struct ctl_table; -extern int proc_nmi_enabled(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int unknown_nmi_panic; - #endif /* CONFIG_X86_LOCAL_APIC */ +extern int unknown_nmi_panic; +extern int panic_on_unrecovered_nmi; +extern int panic_on_io_nmi; + +/* NMI handler flags */ #define NMI_FLAG_FIRST 1 +/** + * enum - NMI types. + * @NMI_LOCAL: Local NMI, CPU-specific NMI generated by the Local APIC. + * @NMI_UNKNOWN: Unknown NMI, the source of the NMI may not be identified. + * @NMI_SERR: System Error NMI, typically triggered by PCI errors. + * @NMI_IO_CHECK: I/O Check NMI, related to I/O errors. + * @NMI_MAX: Maximum value for NMI types. + * + * NMI types are used to categorize NMIs and to dispatch them to the + * appropriate handler. + */ enum { NMI_LOCAL=0, NMI_UNKNOWN, @@ -32,6 +42,7 @@ enum { NMI_MAX }; +/* NMI handler return values */ #define NMI_DONE 0 #define NMI_HANDLED 1 @@ -41,14 +52,33 @@ struct nmiaction { struct list_head list; nmi_handler_t handler; u64 max_duration; - struct irq_work irq_work; unsigned long flags; const char *name; }; +/** + * register_nmi_handler - Register a handler for a specific NMI type + * @t: NMI type (e.g. NMI_LOCAL) + * @fn: The NMI handler + * @fg: Flags associated with the NMI handler + * @n: Name of the NMI handler + * @init: Optional __init* attributes for struct nmiaction + * + * Adds the provided handler to the list of handlers for the specified + * NMI type. Handlers flagged with NMI_FLAG_FIRST would be executed first. + * + * Sometimes the source of an NMI can't be reliably determined which + * results in an NMI being tagged as "unknown". Register an additional + * handler using the NMI type - NMI_UNKNOWN to handle such cases. The + * caller would get one last chance to assume responsibility for the + * NMI. + * + * Return: 0 on success, or an error code on failure. + */ #define register_nmi_handler(t, fn, fg, n, init...) \ ({ \ static struct nmiaction init fn##_na = { \ + .list = LIST_HEAD_INIT(fn##_na.list), \ .handler = (fn), \ .name = (n), \ .flags = (fg), \ @@ -58,7 +88,18 @@ struct nmiaction { int __register_nmi_handler(unsigned int, struct nmiaction *); -void unregister_nmi_handler(unsigned int, const char *); +/** + * unregister_nmi_handler - Unregister a handler for a specific NMI type + * @type: NMI type (e.g. NMI_LOCAL) + * @name: Name of the NMI handler used during registration + * + * Removes the handler associated with the specified NMI type from the + * NMI handler list. The "name" is used as a lookup key to identify the + * handler. + */ +void unregister_nmi_handler(unsigned int type, const char *name); + +void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler); void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index 12f12b5cf2ca..cd94221d8335 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -2,146 +2,88 @@ #ifndef _ASM_X86_NOPS_H #define _ASM_X86_NOPS_H +#include <asm/asm.h> + /* * Define nops for use with alternative() and for tracing. - * - * *_NOP5_ATOMIC must be a single instruction. */ -#define NOP_DS_PREFIX 0x3e +#ifndef CONFIG_64BIT -/* generic versions from gas - 1: nop - the following instructions are NOT nops in 64-bit mode, - for 64-bit mode use K8 or P6 nops instead - 2: movl %esi,%esi - 3: leal 0x00(%esi),%esi - 4: leal 0x00(,%esi,1),%esi - 6: leal 0x00000000(%esi),%esi - 7: leal 0x00000000(,%esi,1),%esi -*/ -#define GENERIC_NOP1 0x90 -#define GENERIC_NOP2 0x89,0xf6 -#define GENERIC_NOP3 0x8d,0x76,0x00 -#define GENERIC_NOP4 0x8d,0x74,0x26,0x00 -#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4 -#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 -#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 -#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7 -#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4 +/* + * Generic 32bit nops from GAS: + * + * 1: nop + * 2: movl %esi,%esi + * 3: leal 0x0(%esi),%esi + * 4: leal 0x0(%esi,%eiz,1),%esi + * 5: leal %ds:0x0(%esi,%eiz,1),%esi + * 6: leal 0x0(%esi),%esi + * 7: leal 0x0(%esi,%eiz,1),%esi + * 8: leal %ds:0x0(%esi,%eiz,1),%esi + * + * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2 + * nop instructions. + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x89,0xf6 +#define BYTES_NOP3 0x8d,0x76,0x00 +#define BYTES_NOP4 0x8d,0x74,0x26,0x00 +#define BYTES_NOP5 0x3e,BYTES_NOP4 +#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 +#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x3e,BYTES_NOP7 -/* Opteron 64bit nops - 1: nop - 2: osp nop - 3: osp osp nop - 4: osp osp osp nop -*/ -#define K8_NOP1 GENERIC_NOP1 -#define K8_NOP2 0x66,K8_NOP1 -#define K8_NOP3 0x66,K8_NOP2 -#define K8_NOP4 0x66,K8_NOP3 -#define K8_NOP5 K8_NOP3,K8_NOP2 -#define K8_NOP6 K8_NOP3,K8_NOP3 -#define K8_NOP7 K8_NOP4,K8_NOP3 -#define K8_NOP8 K8_NOP4,K8_NOP4 -#define K8_NOP5_ATOMIC 0x66,K8_NOP4 +#define ASM_NOP_MAX 8 -/* K7 nops - uses eax dependencies (arbitrary choice) - 1: nop - 2: movl %eax,%eax - 3: leal (,%eax,1),%eax - 4: leal 0x00(,%eax,1),%eax - 6: leal 0x00000000(%eax),%eax - 7: leal 0x00000000(,%eax,1),%eax -*/ -#define K7_NOP1 GENERIC_NOP1 -#define K7_NOP2 0x8b,0xc0 -#define K7_NOP3 0x8d,0x04,0x20 -#define K7_NOP4 0x8d,0x44,0x20,0x00 -#define K7_NOP5 K7_NOP4,K7_NOP1 -#define K7_NOP6 0x8d,0x80,0,0,0,0 -#define K7_NOP7 0x8D,0x04,0x05,0,0,0,0 -#define K7_NOP8 K7_NOP7,K7_NOP1 -#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4 +#else -/* P6 nops - uses eax dependencies (Intel-recommended choice) - 1: nop - 2: osp nop - 3: nopl (%eax) - 4: nopl 0x00(%eax) - 5: nopl 0x00(%eax,%eax,1) - 6: osp nopl 0x00(%eax,%eax,1) - 7: nopl 0x00000000(%eax) - 8: nopl 0x00000000(%eax,%eax,1) - Note: All the above are assumed to be a single instruction. - There is kernel code that depends on this. -*/ -#define P6_NOP1 GENERIC_NOP1 -#define P6_NOP2 0x66,0x90 -#define P6_NOP3 0x0f,0x1f,0x00 -#define P6_NOP4 0x0f,0x1f,0x40,0 -#define P6_NOP5 0x0f,0x1f,0x44,0x00,0 -#define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0 -#define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0 -#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0 -#define P6_NOP5_ATOMIC P6_NOP5 +/* + * Generic 64bit nops from GAS: + * + * 1: nop + * 2: osp nop + * 3: nopl (%eax) + * 4: nopl 0x00(%eax) + * 5: nopl 0x00(%eax,%eax,1) + * 6: osp nopl 0x00(%eax,%eax,1) + * 7: nopl 0x00000000(%eax) + * 8: nopl 0x00000000(%eax,%eax,1) + * 9: cs nopl 0x00000000(%eax,%eax,1) + * 10: osp cs nopl 0x00000000(%eax,%eax,1) + * 11: osp osp cs nopl 0x00000000(%eax,%eax,1) + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x66,BYTES_NOP1 +#define BYTES_NOP3 0x0f,0x1f,0x00 +#define BYTES_NOP4 0x0f,0x1f,0x40,0x00 +#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00 +#define BYTES_NOP6 0x66,BYTES_NOP5 +#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +#define BYTES_NOP9 0x2e,BYTES_NOP8 +#define BYTES_NOP10 0x66,BYTES_NOP9 +#define BYTES_NOP11 0x66,BYTES_NOP10 -#ifdef __ASSEMBLY__ -#define _ASM_MK_NOP(x) .byte x -#else -#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" -#endif +#define ASM_NOP9 _ASM_BYTES(BYTES_NOP9) +#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10) +#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11) -#if defined(CONFIG_MK7) -#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC) -#elif defined(CONFIG_X86_P6_NOP) -#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC) -#elif defined(CONFIG_X86_64) -#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC) -#else -#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8) -#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC) -#endif +#define ASM_NOP_MAX 11 -#define ASM_NOP_MAX 8 -#define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */ +#endif /* CONFIG_64BIT */ + +#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1) +#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2) +#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3) +#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4) +#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5) +#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6) +#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) +#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) -#ifndef __ASSEMBLY__ -extern const unsigned char * const *ideal_nops; -extern void arch_init_ideal_nops(void); +#ifndef __ASSEMBLER__ +extern const unsigned char * const x86_nops[]; #endif #endif /* _ASM_X86_NOPS_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index dad12b767ba0..4f4b5e8a1574 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -4,11 +4,102 @@ #define _ASM_X86_NOSPEC_BRANCH_H_ #include <linux/static_key.h> +#include <linux/objtool.h> +#include <linux/linkage.h> #include <asm/alternative.h> -#include <asm/alternative-asm.h> #include <asm/cpufeatures.h> #include <asm/msr-index.h> +#include <asm/unwind_hints.h> +#include <asm/percpu.h> + +/* + * Call depth tracking for Intel SKL CPUs to address the RSB underflow + * issue in software. + * + * The tracking does not use a counter. It uses uses arithmetic shift + * right on call entry and logical shift left on return. + * + * The depth tracking variable is initialized to 0x8000.... when the call + * depth is zero. The arithmetic shift right sign extends the MSB and + * saturates after the 12th call. The shift count is 5 for both directions + * so the tracking covers 12 nested calls. + * + * Call + * 0: 0x8000000000000000 0x0000000000000000 + * 1: 0xfc00000000000000 0xf000000000000000 + * ... + * 11: 0xfffffffffffffff8 0xfffffffffffffc00 + * 12: 0xffffffffffffffff 0xffffffffffffffe0 + * + * After a return buffer fill the depth is credited 12 calls before the + * next stuffing has to take place. + * + * There is a inaccuracy for situations like this: + * + * 10 calls + * 5 returns + * 3 calls + * 4 returns + * 3 calls + * .... + * + * The shift count might cause this to be off by one in either direction, + * but there is still a cushion vs. the RSB depth. The algorithm does not + * claim to be perfect and it can be speculated around by the CPU, but it + * is considered that it obfuscates the problem enough to make exploitation + * extremely difficult. + */ +#define RET_DEPTH_SHIFT 5 +#define RSB_RET_STUFF_LOOPS 16 +#define RET_DEPTH_INIT 0x8000000000000000ULL +#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL +#define RET_DEPTH_CREDIT 0xffffffffffffffffULL + +#ifdef CONFIG_CALL_THUNKS_DEBUG +# define CALL_THUNKS_DEBUG_INC_CALLS \ + incq PER_CPU_VAR(__x86_call_count); +# define CALL_THUNKS_DEBUG_INC_RETS \ + incq PER_CPU_VAR(__x86_ret_count); +# define CALL_THUNKS_DEBUG_INC_STUFFS \ + incq PER_CPU_VAR(__x86_stuffs_count); +# define CALL_THUNKS_DEBUG_INC_CTXSW \ + incq PER_CPU_VAR(__x86_ctxsw_count); +#else +# define CALL_THUNKS_DEBUG_INC_CALLS +# define CALL_THUNKS_DEBUG_INC_RETS +# define CALL_THUNKS_DEBUG_INC_STUFFS +# define CALL_THUNKS_DEBUG_INC_CTXSW +#endif + +#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) + +#include <asm/asm-offsets.h> + +#define CREDIT_CALL_DEPTH \ + movq $-1, PER_CPU_VAR(__x86_call_depth); + +#define RESET_CALL_DEPTH \ + xor %eax, %eax; \ + bts $63, %rax; \ + movq %rax, PER_CPU_VAR(__x86_call_depth); + +#define RESET_CALL_DEPTH_FROM_CALL \ + movb $0xfc, %al; \ + shl $56, %rax; \ + movq %rax, PER_CPU_VAR(__x86_call_depth); \ + CALL_THUNKS_DEBUG_INC_CALLS + +#define INCREMENT_CALL_DEPTH \ + sarq $5, PER_CPU_VAR(__x86_call_depth); \ + CALL_THUNKS_DEBUG_INC_CALLS + +#else +#define CREDIT_CALL_DEPTH +#define RESET_CALL_DEPTH +#define RESET_CALL_DEPTH_FROM_CALL +#define INCREMENT_CALL_DEPTH +#endif /* * Fill the CPU return stack buffer. @@ -27,112 +118,121 @@ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. */ +#define RETPOLINE_THUNK_SIZE 32 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ /* + * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. + */ +#define __FILL_RETURN_SLOT \ + ANNOTATE_INTRA_FUNCTION_CALL; \ + call 772f; \ + int3; \ +772: + +/* + * Stuff the entire RSB. + * * Google experimented with loop-unrolling and this turned out to be - * the optimal version — two calls, each with their own speculation + * the optimal version - two calls, each with their own speculation * trap should their return address end up getting used, in a loop. */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - call 772f; \ -773: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - call 774f; \ -775: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; - -#ifdef __ASSEMBLY__ +#ifdef CONFIG_X86_64 +#define __FILL_RETURN_BUFFER(reg, nr) \ + mov $(nr/2), reg; \ +771: \ + __FILL_RETURN_SLOT \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ + dec reg; \ + jnz 771b; \ + /* barrier for jnz misprediction */ \ + lfence; \ + CREDIT_CALL_DEPTH \ + CALL_THUNKS_DEBUG_INC_CTXSW +#else +/* + * i386 doesn't unconditionally have LFENCE, as such it can't + * do a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr) \ + .rept nr; \ + __FILL_RETURN_SLOT; \ + .endr; \ + add $(BITS_PER_LONG/8) * nr, %_ASM_SP; +#endif /* - * This should be used immediately before a retpoline alternative. It tells - * objtool where the retpolines are so that it can make sense of the control - * flow by just reading the original instruction(s) and ignoring the - * alternatives. + * Stuff a single RSB slot. + * + * To mitigate Post-Barrier RSB speculation, one CALL instruction must be + * forced to retire before letting a RET instruction execute. + * + * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed + * before this point. */ -.macro ANNOTATE_NOSPEC_ALTERNATIVE - .Lannotate_\@: - .pushsection .discard.nospec - .long .Lannotate_\@ - . - .popsection -.endm +#define __FILL_ONE_RETURN \ + __FILL_RETURN_SLOT \ + add $(BITS_PER_LONG/8), %_ASM_SP; \ + lfence; + +#ifdef __ASSEMBLER__ /* - * This should be used immediately before an indirect jump/call. It tells - * objtool the subsequent indirect jump/call is vouched safe for retpoline - * builds. + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. */ -.macro ANNOTATE_RETPOLINE_SAFE - .Lannotate_\@: - .pushsection .discard.retpoline_safe - _ASM_PTR .Lannotate_\@ - .popsection -.endm +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE /* - * These are the bare retpoline primitives for indirect jmp and call. - * Do not use these directly; they only exist to make the ALTERNATIVE - * invocation below less ugly. + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into its own annotation. */ -.macro RETPOLINE_JMP reg:req - call .Ldo_rop_\@ -.Lspec_trap_\@: - pause - lfence - jmp .Lspec_trap_\@ -.Ldo_rop_\@: - mov \reg, (%_ASM_SP) - ret +.macro VALIDATE_UNRET_END +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) + ANNOTATE_RETPOLINE_SAFE + nop +#endif .endm /* - * This is a wrapper around RETPOLINE_JMP so the called function in reg - * returns to the instruction after the macro. + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. */ -.macro RETPOLINE_CALL reg:req - jmp .Ldo_call_\@ -.Ldo_retpoline_jmp_\@: - RETPOLINE_JMP \reg -.Ldo_call_\@: - call .Ldo_retpoline_jmp_\@ +.macro __CS_PREFIX reg:req + .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 + .ifc \reg,\rs + .byte 0x2e + .endif + .endr .endm /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. + * + * NOTE: these do not take kCFI into account and are thus not comparable to C + * indirect calls, take care when using. The target of these should be an ENDBR + * instruction irrespective of kCFI. */ .macro JMP_NOSPEC reg:req -#ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ - __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#ifdef CONFIG_MITIGATION_RETPOLINE + __CS_PREFIX \reg + jmp __x86_indirect_thunk_\reg #else - jmp *\reg + jmp *%\reg + int3 #endif .endm .macro CALL_NOSPEC reg:req -#ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ - __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD +#ifdef CONFIG_MITIGATION_RETPOLINE + __CS_PREFIX \reg + call __x86_indirect_thunk_\reg #else - call *\reg + call *%\reg #endif .endm @@ -140,48 +240,222 @@ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ + __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2 + .Lskip_rsb_\@: +.endm + +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS +#endif +.endm + +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * While retbleed_untrain_ret() doesn't clobber anything but requires stack, + * write_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro __UNTRAIN_RET ibpb_feature, call_depth_insns +#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) + VALIDATE_UNRET_END + CALL_UNTRAIN_RET + ALTERNATIVE_2 "", \ + "call write_ibpb", \ibpb_feature, \ + __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH +#endif +.endm + +#define UNTRAIN_RET \ + __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH) + +#define UNTRAIN_RET_VM \ + __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH) + +#define UNTRAIN_RET_FROM_CALL \ + __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL) + + +.macro CALL_DEPTH_ACCOUNT +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING + ALTERNATIVE "", \ + __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH +#endif +.endm + +/* + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +#ifdef CONFIG_X86_64 +#define VERW verw x86_verw_sel(%rip) +#else +/* + * In 32bit mode, the memory operand must be a %cs reference. The data segments + * may not be usable (vm86 mode), and the stack segment may not be flat (ESPFIX32). + */ +#define VERW verw %cs:x86_verw_sel #endif + +/* + * Provide a stringified VERW macro for simple usage, and a non-stringified + * VERW macro for use in more elaborate sequences, e.g. to encode a conditional + * VERW within an ALTERNATIVE. + */ +#define __CLEAR_CPU_BUFFERS __stringify(VERW) + +/* If necessary, emit VERW on exit-to-userspace to clear CPU buffers. */ +#define CLEAR_CPU_BUFFERS \ + ALTERNATIVE "", __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF + +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT .endm +#else +#define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT +#endif + +#else /* __ASSEMBLER__ */ + +#define ITS_THUNK_SIZE 64 + +typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +typedef u8 its_thunk_t[ITS_THUNK_SIZE]; +extern retpoline_thunk_t __x86_indirect_thunk_array[]; +extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; +extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; +extern its_thunk_t __x86_indirect_its_thunk_array[]; + +#ifdef CONFIG_MITIGATION_RETHUNK +extern void __x86_return_thunk(void); +#else +static inline void __x86_return_thunk(void) {} +#endif + +#ifdef CONFIG_MITIGATION_UNRET_ENTRY +extern void retbleed_return_thunk(void); +#else +static inline void retbleed_return_thunk(void) {} +#endif + +extern void srso_alias_untrain_ret(void); + +#ifdef CONFIG_MITIGATION_SRSO +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); +#else +static inline void srso_return_thunk(void) {} +static inline void srso_alias_return_thunk(void) {} +#endif + +#ifdef CONFIG_MITIGATION_ITS +extern void its_return_thunk(void); +#else +static inline void its_return_thunk(void) {} +#endif + +extern void retbleed_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); + +extern void entry_untrain_ret(void); +extern void write_ibpb(void); + +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + +extern void (*x86_return_thunk)(void); + +extern void __warn_thunk(void); + +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING +extern void call_depth_return_thunk(void); + +#define CALL_DEPTH_ACCOUNT \ + ALTERNATIVE("", \ + __stringify(INCREMENT_CALL_DEPTH), \ + X86_FEATURE_CALL_DEPTH) + +DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); + +#ifdef CONFIG_CALL_THUNKS_DEBUG +DECLARE_PER_CPU(u64, __x86_call_count); +DECLARE_PER_CPU(u64, __x86_ret_count); +DECLARE_PER_CPU(u64, __x86_stuffs_count); +DECLARE_PER_CPU(u64, __x86_ctxsw_count); +#endif +#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ + +static inline void call_depth_return_thunk(void) {} +#define CALL_DEPTH_ACCOUNT "" + +#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ + +#ifdef CONFIG_MITIGATION_RETPOLINE -#else /* __ASSEMBLY__ */ +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; +#include <asm/GEN-for-each-reg.h> +#undef GEN -#define ANNOTATE_NOSPEC_ALTERNATIVE \ - "999:\n\t" \ - ".pushsection .discard.nospec\n\t" \ - ".long 999b - .\n\t" \ - ".popsection\n\t" +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; +#include <asm/GEN-for-each-reg.h> +#undef GEN -#define ANNOTATE_RETPOLINE_SAFE \ - "999:\n\t" \ - ".pushsection .discard.retpoline_safe\n\t" \ - _ASM_PTR " 999b\n\t" \ - ".popsection\n\t" +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; +#include <asm/GEN-for-each-reg.h> +#undef GEN -#ifdef CONFIG_RETPOLINE #ifdef CONFIG_X86_64 /* + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. + */ +#define __CS_PREFIX(reg) \ + ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\rs," reg "\n" \ + ".byte 0x2e\n" \ + ".endif\n" \ + ".endr\n" + +/* * Inline asm uses the %V modifier which is only in newer GCC - * which is ensured when CONFIG_RETPOLINE is defined. + * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ -# define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ - ALTERNATIVE_2( \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE, \ - "lfence;\n" \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) +#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ + "call __x86_indirect_thunk_%V[thunk_target]\n" + # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #else /* CONFIG_X86_32 */ @@ -191,9 +465,8 @@ * here, anyway. */ # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE_2( \ - ANNOTATE_RETPOLINE_SAFE \ + ANNOTATE_RETPOLINE_SAFE "\n" \ "call *%[thunk_target]\n", \ " jmp 904f;\n" \ " .align 16\n" \ @@ -202,16 +475,16 @@ " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ - "903: addl $4, %%esp;\n" \ + "903: lea 4(%%esp), %%esp;\n" \ " pushl %[thunk_target];\n" \ " ret;\n" \ " .align 16\n" \ "904: call 901b;\n", \ X86_FEATURE_RETPOLINE, \ "lfence;\n" \ - ANNOTATE_RETPOLINE_SAFE \ + ANNOTATE_RETPOLINE_SAFE "\n" \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -223,9 +496,12 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_GENERIC, - SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS_ENHANCED, + SPECTRE_V2_RETPOLINE, + SPECTRE_V2_LFENCE, + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ @@ -240,35 +516,12 @@ enum spectre_v2_user_mitigation { /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_AUTO, SPEC_STORE_BYPASS_DISABLE, SPEC_STORE_BYPASS_PRCTL, SPEC_STORE_BYPASS_SECCOMP, }; -extern char __indirect_thunk_start[]; -extern char __indirect_thunk_end[]; - -/* - * On VMEXIT we must ensure that no RSB predictions learned in the guest - * can be followed in the host, by overwriting the RSB completely. Both - * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ALL *might* it be avoided. - */ -static inline void vmexit_fill_RSB(void) -{ -#ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); -#endif -} - static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { @@ -280,15 +533,20 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } +DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); + static inline void indirect_branch_prediction_barrier(void) { - u64 val = PRED_CMD_IBPB; - - alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); + asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) + : ASM_CALL_CONSTRAINT + :: "rax", "rcx", "rdx", "memory"); } /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); +extern void update_spec_ctrl_cond(u64 val); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction @@ -298,18 +556,18 @@ extern u64 x86_spec_ctrl_base; */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ + X86_FEATURE_USE_IBPB_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) @@ -318,65 +576,51 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -#endif /* __ASSEMBLY__ */ +DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -/* - * Below is used in the eBPF JIT compiler and emits the byte sequence - * for the following assembly: - * - * With retpolines configured: - * - * callq do_rop - * spec_trap: - * pause - * lfence - * jmp spec_trap - * do_rop: - * mov %rax,(%rsp) for x86_64 - * mov %edx,(%esp) for x86_32 - * retq +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); + +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); + +extern u16 x86_verw_sel; + +#include <asm/segment.h> + +/** + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * - * Without retpolines configured: + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static __always_inline void x86_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + +/** + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * - * jmp *%rax for x86_64 - * jmp *%edx for x86_32 + * Clear CPU buffers if the corresponding static key is enabled */ -#ifdef CONFIG_RETPOLINE -# ifdef CONFIG_X86_64 -# define RETPOLINE_RAX_BPF_JIT_SIZE 17 -# define RETPOLINE_RAX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* callq do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ - EMIT1(0xC3); /* retq */ \ -} while (0) -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* call do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ - EMIT1(0xC3); /* ret */ \ -} while (0) -# endif -#else /* !CONFIG_RETPOLINE */ -# ifdef CONFIG_X86_64 -# define RETPOLINE_RAX_BPF_JIT_SIZE 2 -# define RETPOLINE_RAX_BPF_JIT() \ - EMIT2(0xFF, 0xE0); /* jmp *%rax */ -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ - EMIT2(0xFF, 0xE2) /* jmp *%edx */ -# endif -#endif +static __always_inline void x86_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); +} + +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index bbfde3d2662f..53ba39ce010c 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -3,21 +3,13 @@ #define _ASM_X86_NUMA_H #include <linux/nodemask.h> +#include <linux/errno.h> #include <asm/topology.h> #include <asm/apicdef.h> #ifdef CONFIG_NUMA -#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) - -/* - * Too small node sizes may confuse the VM badly. Usually they - * result from BIOS bugs. So dont recognize nodes as standalone - * NUMA entities that have less than this amount of RAM listed: - */ -#define NODE_MIN_SIZE (4*1024*1024) - extern int numa_off; /* @@ -31,9 +23,6 @@ extern int numa_off; extern s16 __apicid_to_node[MAX_LOCAL_APIC]; extern nodemask_t numa_nodes_parsed __initdata; -extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); -extern void __init numa_set_distance(int from, int to, int distance); - static inline void set_apicid_to_node(int apicid, s16 node) { __apicid_to_node[apicid] = node; @@ -52,32 +41,24 @@ static inline int numa_cpu_node(int cpu) } #endif /* CONFIG_NUMA */ -#ifdef CONFIG_X86_32 -# include <asm/numa_32.h> -#endif - #ifdef CONFIG_NUMA extern void numa_set_node(int cpu, int node); extern void numa_clear_node(int cpu); extern void __init init_cpu_to_node(void); -extern void numa_add_cpu(int cpu); -extern void numa_remove_cpu(int cpu); +extern void numa_add_cpu(unsigned int cpu); +extern void numa_remove_cpu(unsigned int cpu); +extern void init_gi_nodes(void); #else /* CONFIG_NUMA */ static inline void numa_set_node(int cpu, int node) { } static inline void numa_clear_node(int cpu) { } static inline void init_cpu_to_node(void) { } -static inline void numa_add_cpu(int cpu) { } -static inline void numa_remove_cpu(int cpu) { } +static inline void numa_add_cpu(unsigned int cpu) { } +static inline void numa_remove_cpu(unsigned int cpu) { } +static inline void init_gi_nodes(void) { } #endif /* CONFIG_NUMA */ #ifdef CONFIG_DEBUG_PER_CPU_MAPS -void debug_cpumask_set_cpu(int cpu, int node, bool enable); +void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable); #endif -#ifdef CONFIG_NUMA_EMU -#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) -#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) -void numa_emu_cmdline(char *); -#endif /* CONFIG_NUMA_EMU */ - #endif /* _ASM_X86_NUMA_H */ diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h deleted file mode 100644 index 9c8e9e85be77..000000000000 --- a/arch/x86/include/asm/numa_32.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_NUMA_32_H -#define _ASM_X86_NUMA_32_H - -#ifdef CONFIG_HIGHMEM -extern void set_highmem_pages_init(void); -#else -static inline void set_highmem_pages_init(void) -{ -} -#endif - -#endif /* _ASM_X86_NUMA_32_H */ diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index c2bf1de5d901..6fe76282aceb 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h @@ -9,12 +9,10 @@ struct olpc_platform_t { int flags; uint32_t boardrev; - int ecver; }; #define OLPC_F_PRESENT 0x01 #define OLPC_F_DCON 0x02 -#define OLPC_F_EC_WIDE_SCI 0x04 #ifdef CONFIG_OLPC @@ -64,13 +62,6 @@ static inline int olpc_board_at_least(uint32_t rev) return olpc_platform_info.boardrev >= rev; } -extern void olpc_ec_wakeup_set(u16 value); -extern void olpc_ec_wakeup_clear(u16 value); -extern bool olpc_ec_wakeup_available(void); - -extern int olpc_ec_mask_write(u16 bits); -extern int olpc_ec_sci_query(u16 *sci_value); - #else static inline int machine_is_olpc(void) @@ -83,14 +74,6 @@ static inline int olpc_has_dcon(void) return 0; } -static inline void olpc_ec_wakeup_set(u16 value) { } -static inline void olpc_ec_wakeup_clear(u16 value) { } - -static inline bool olpc_ec_wakeup_available(void) -{ - return false; -} - #endif #ifdef CONFIG_OLPC_XO1_PM @@ -101,20 +84,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value); extern int pci_olpc_init(void); -/* SCI source values */ - -#define EC_SCI_SRC_EMPTY 0x00 -#define EC_SCI_SRC_GAME 0x01 -#define EC_SCI_SRC_BATTERY 0x02 -#define EC_SCI_SRC_BATSOC 0x04 -#define EC_SCI_SRC_BATERR 0x08 -#define EC_SCI_SRC_EBOOK 0x10 /* XO-1 only */ -#define EC_SCI_SRC_WLAN 0x20 /* XO-1 only */ -#define EC_SCI_SRC_ACPWR 0x40 -#define EC_SCI_SRC_BATCRIT 0x80 -#define EC_SCI_SRC_GPWAKE 0x100 /* XO-1.5 only */ -#define EC_SCI_SRC_ALL 0x1FF - /* GPIO assignments */ #define OLPC_GPIO_MIC_AC 1 diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h new file mode 100644 index 000000000000..07bacf3e160e --- /dev/null +++ b/arch/x86/include/asm/orc_header.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#ifndef _ORC_HEADER_H +#define _ORC_HEADER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/orc_hash.h> + +/* + * The header is currently a 20-byte hash of the ORC entry definition; see + * scripts/orc_hash.sh. + */ +#define ORC_HEADER \ + __used __section(".orc_header") __aligned(4) \ + static const u8 orc_header[] = { ORC_HASH } + +#endif /* _ORC_HEADER_H */ diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h index 91c8d868424d..241631282e43 100644 --- a/arch/x86/include/asm/orc_lookup.h +++ b/arch/x86/include/asm/orc_lookup.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _ORC_LOOKUP_H #define _ORC_LOOKUP_H diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 46f516dd80ce..e0125afa53fb 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _ORC_TYPES_H @@ -51,29 +39,15 @@ #define ORC_REG_SP_INDIRECT 9 #define ORC_REG_MAX 15 -/* - * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the - * caller's SP right before it made the call). Used for all callable - * functions, i.e. all C code and all callable asm functions. - * - * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points - * to a fully populated pt_regs from a syscall, interrupt, or exception. - * - * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset - * points to the iret return frame. - * - * The UNWIND_HINT macros are used only for the unwind_hint struct. They - * aren't used in struct orc_entry due to size and complexity constraints. - * Objtool converts them to real types when it converts the hints to orc - * entries. - */ -#define ORC_TYPE_CALL 0 -#define ORC_TYPE_REGS 1 -#define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_SAVE 3 -#define UNWIND_HINT_TYPE_RESTORE 4 +#define ORC_TYPE_UNDEFINED 0 +#define ORC_TYPE_END_OF_STACK 1 +#define ORC_TYPE_CALL 2 +#define ORC_TYPE_REGS 3 +#define ORC_TYPE_REGS_PARTIAL 4 + +#ifndef __ASSEMBLER__ +#include <asm/byteorder.h> -#ifndef __ASSEMBLY__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -85,25 +59,20 @@ struct orc_entry { s16 sp_offset; s16 bp_offset; +#if defined(__LITTLE_ENDIAN_BITFIELD) unsigned sp_reg:4; unsigned bp_reg:4; - unsigned type:2; - unsigned end:1; + unsigned type:3; + unsigned signal:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned bp_reg:4; + unsigned sp_reg:4; + unsigned unused:4; + unsigned signal:1; + unsigned type:3; +#endif } __packed; -/* - * This struct is used by asm and inline asm code to manually annotate the - * location of registers on the stack for the ORC unwinder. - * - * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. - */ -struct unwind_hint { - u32 ip; - s16 sp_offset; - u8 sp_reg; - u8 type; - u8 end; -}; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 7555b48803a8..9265f2fca99a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -14,7 +14,7 @@ #include <asm/page_32.h> #endif /* CONFIG_X86_64 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct page; @@ -34,9 +34,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, copy_page(to, from); } -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +#define vma_alloc_zeroed_movable_folio(vma, vaddr) \ + vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr) #ifndef __pa #define __pa(x) __phys_addr((unsigned long)(x)) @@ -67,11 +66,25 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, * virt_addr_valid(kaddr) returns true. */ #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) extern bool __virt_addr_valid(unsigned long kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr)) -#endif /* __ASSEMBLY__ */ +static __always_inline void *pfn_to_kaddr(unsigned long pfn) +{ + return __va(pfn << PAGE_SHIFT); +} + +static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); +} + +static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return __canonical_address(vaddr, vaddr_bits) == vaddr; +} + +#endif /* __ASSEMBLER__ */ #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 94dbd51df58f..0c623706cb7e 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -4,7 +4,7 @@ #include <asm/page_32_types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) #ifdef CONFIG_DEBUG_VIRTUAL @@ -15,23 +15,6 @@ extern unsigned long __phys_addr(unsigned long); #define __phys_addr_symbol(x) __phys_addr(x) #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_FLATMEM */ - -#ifdef CONFIG_X86_USE_3DNOW -#include <asm/mmx.h> - -static inline void clear_page(void *page) -{ - mmx_clear_page(page); -} - -static inline void copy_page(void *to, void *from) -{ - mmx_copy_page(to, from); -} -#else /* !CONFIG_X86_USE_3DNOW */ #include <linux/string.h> static inline void clear_page(void *page) @@ -43,7 +26,6 @@ static inline void copy_page(void *to, void *from) { memcpy(to, from, PAGE_SIZE); } -#endif /* CONFIG_X86_3DNOW */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0d5c739eebd7..623f1e9f493e 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -11,8 +11,8 @@ * a virtual address space of one gigabyte, which limits the * amount of physical memory you can use to about 950MB. * - * If you want more physical memory than this then see the CONFIG_HIGHMEM4G - * and CONFIG_HIGHMEM64G options in the kernel configuration. + * If you want more physical memory than this then see the CONFIG_VMSPLIT_2G + * and CONFIG_HIGHMEM4G options in the kernel configuration. */ #define __PAGE_OFFSET_BASE _AC(CONFIG_PAGE_OFFSET, UL) #define __PAGE_OFFSET __PAGE_OFFSET_BASE @@ -22,11 +22,9 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define DOUBLEFAULT_STACK 1 -#define NMI_STACK 0 -#define DEBUG_STACK 0 -#define MCE_STACK 0 -#define N_EXCEPTION_STACKS 1 +#define IRQ_STACK_SIZE THREAD_SIZE + +#define N_EXCEPTION_STACKS 1 #ifdef CONFIG_X86_PAE /* @@ -44,11 +42,28 @@ #endif /* CONFIG_X86_PAE */ /* - * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S) + * User space process size: 3GB (default). + */ +#define IA32_PAGE_OFFSET __PAGE_OFFSET +#define TASK_SIZE __PAGE_OFFSET +#define TASK_SIZE_LOW TASK_SIZE +#define TASK_SIZE_MAX TASK_SIZE +#define DEFAULT_MAP_WINDOW TASK_SIZE +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP + +/* + * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual + * address for the kernel image, rather than the limit on the size itself. On + * 32-bit, this is not a strict limit, but this value is used to limit the + * link-time virtual address range of the kernel, and by KASLR to limit the + * randomized address from which the kernel is executed. A relocatable kernel + * can be loaded somewhat higher than KERNEL_IMAGE_SIZE as long as enough space + * remains for the vmalloc area. */ #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This much address space is reserved for vmalloc() and iomap() @@ -58,8 +73,7 @@ extern unsigned int __VMALLOC_RESERVE; extern int sysctl_legacy_va_layout; extern void find_low_pfn_range(void); -extern void setup_bootmem_allocator(void); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_32_DEFS_H */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 939b1cff4a7b..2f0e47be79a4 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -4,9 +4,13 @@ #include <asm/page_64_types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ +#include <asm/cpufeatures.h> #include <asm/alternative.h> +#include <linux/kmsan-checks.h> +#include <linux/mmdebug.h> + /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; extern unsigned long phys_base; @@ -14,8 +18,9 @@ extern unsigned long phys_base; extern unsigned long page_offset_base; extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; +extern unsigned long direct_map_physmem_end; -static inline unsigned long __phys_addr_nodebug(unsigned long x) +static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; @@ -27,36 +32,79 @@ static inline unsigned long __phys_addr_nodebug(unsigned long x) #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); -extern unsigned long __phys_addr_symbol(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) -#define __phys_addr_symbol(x) \ - ((unsigned long)(x) - __START_KERNEL_map + phys_base) #endif -#define __phys_reloc_hide(x) (x) +static inline unsigned long __phys_addr_symbol(unsigned long x) +{ + unsigned long y = x - __START_KERNEL_map; -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < max_pfn) -#endif + /* only check upper bounds since lower bounds will trigger carry */ + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); + + return y + phys_base; +} + +#define __phys_reloc_hide(x) (x) void clear_page_orig(void *page); void clear_page_rep(void *page); void clear_page_erms(void *page); +KCFI_REFERENCE(clear_page_orig); +KCFI_REFERENCE(clear_page_rep); +KCFI_REFERENCE(clear_page_erms); static inline void clear_page(void *page) { + /* + * Clean up KMSAN metadata for the page being cleared. The assembly call + * below clobbers @page, so we perform unpoisoning before it. + */ + kmsan_unpoison_memory(page, PAGE_SIZE); alternative_call_2(clear_page_orig, clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, "=D" (page), - "0" (page) - : "cc", "memory", "rax", "rcx"); + "D" (page), + "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); +KCFI_REFERENCE(copy_page); + +/* + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] + */ +static __always_inline unsigned long task_size_max(void) +{ + unsigned long ret; + + alternative_io("movq %[small],%0","movq %[large],%0", + X86_FEATURE_LA57, + "=r" (ret), + [small] "i" ((1ul << 47)-PAGE_SIZE), + [large] "i" ((1ul << 56)-PAGE_SIZE)); + + return ret; +} -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION # define __HAVE_ARCH_GATE_AREA 1 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8f657286d599..7400dab373fe 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/kaslr.h> #endif @@ -14,22 +14,21 @@ #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define CURRENT_MASK (~(THREAD_SIZE - 1)) -#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) +#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) -#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) -#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) - #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define DOUBLEFAULT_STACK 1 -#define NMI_STACK 2 -#define DEBUG_STACK 3 -#define MCE_STACK 4 -#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ +/* + * The index for the tss.ist[] array. The hardware limit is 7 entries. + */ +#define IST_INDEX_DF 0 +#define IST_INDEX_NMI 1 +#define IST_INDEX_DB 2 +#define IST_INDEX_MCE 3 +#define IST_INDEX_VC 4 /* * Set __PAGE_OFFSET to the most negative possible address + @@ -42,27 +41,39 @@ #define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL) #define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL) -#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base -#else -#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 -#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ +/* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 52 - -#ifdef CONFIG_X86_5LEVEL #define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47) -#else -#define __VIRTUAL_MASK_SHIFT 47 -#endif + +#define TASK_SIZE_MAX task_size_max() +#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ + 0xc0000000 : 0xFFFFe000) + +#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) +#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) + +#define STACK_TOP TASK_SIZE_LOW +#define STACK_TOP_MAX TASK_SIZE_MAX /* - * Maximum kernel image size is limited to 1 GiB, due to the fixmap living - * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). + * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual + * address for the kernel image, rather than the limit on the size itself. + * This can be at most 1 GiB, due to the fixmap living in the next 1 GiB (see + * level2_kernel_pgt in arch/x86/kernel/head_64.S). * * On KASLR use 1 GiB by default, leaving 1 GiB for modules once the * page tables are fully set up. diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48..018a8d906ca3 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -6,25 +6,16 @@ #include <linux/types.h> #include <linux/mem_encrypt.h> -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) - -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) +#include <vdso/page.h> #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) -/* Cast *PAGE_MASK to a signed type so that it is sign-extended if +/* Cast P*D_MASK to a signed type so that it is sign-extended if virtual addresses are 32-bits but physical addresses are larger (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK) #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) @@ -35,14 +26,12 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC -#define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ - CONFIG_PHYSICAL_ALIGN) +/* Physical address where kernel should be loaded. */ +#define LOAD_PHYSICAL_ADDR __ALIGN_KERNEL_MASK(CONFIG_PHYSICAL_START, CONFIG_PHYSICAL_ALIGN - 1) -#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) +#define __START_KERNEL (__START_KERNEL_map + LOAD_PHYSICAL_ADDR) #ifdef CONFIG_X86_64 #include <asm/page_64_types.h> @@ -52,7 +41,7 @@ #define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif /* CONFIG_X86_64 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK extern phys_addr_t physical_mask; @@ -73,11 +62,8 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PAGE_DEFS_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a97f28d914d5..b5e59a7ba0d0 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -4,22 +4,35 @@ /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ +#include <asm/paravirt_types.h> + +#ifndef __ASSEMBLER__ +struct mm_struct; +#endif + #ifdef CONFIG_PARAVIRT #include <asm/pgtable_types.h> #include <asm/asm.h> #include <asm/nospec-branch.h> -#include <asm/paravirt_types.h> - -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bug.h> #include <linux/types.h> #include <linux/cpumask.h> +#include <linux/static_call_types.h> #include <asm/frame.h> -static inline unsigned long long paravirt_sched_clock(void) +u64 dummy_steal_clock(int cpu); +u64 dummy_sched_clock(void); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); +DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock); + +void paravirt_set_sched_clock(u64 (*func)(void)); + +static __always_inline u64 paravirt_sched_clock(void) { - return PVOP_CALL0(unsigned long long, time.sched_clock); + return static_call(pv_sched_clock)(); } struct static_key; @@ -33,21 +46,31 @@ bool pv_is_native_vcpu_is_preempted(void); static inline u64 paravirt_steal_clock(int cpu) { - return PVOP_CALL1(u64, time.steal_clock, cpu); + return static_call(pv_steal_clock)(cpu); } +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void __init paravirt_set_cap(void); +#endif + /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { - pv_ops.cpu.io_delay(); + PVOP_VCALL0(cpu.io_delay); #ifdef REALLY_SLOW_IO - pv_ops.cpu.io_delay(); - pv_ops.cpu.io_delay(); - pv_ops.cpu.io_delay(); + PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(cpu.io_delay); #endif } -static inline void __flush_tlb(void) +void native_flush_tlb_local(void); +void native_flush_tlb_global(void); +void native_flush_tlb_one_user(unsigned long addr); +void native_flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + +static inline void __flush_tlb_local(void) { PVOP_VCALL0(mmu.flush_tlb_user); } @@ -62,20 +85,31 @@ static inline void __flush_tlb_one_user(unsigned long addr) PVOP_VCALL1(mmu.flush_tlb_one_user, addr); } -static inline void flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) +static inline void __flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info) { - PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); + PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info); } -static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { - PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); + PVOP_VCALL1(mmu.exit_mmap, mm); } -static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) +static inline void notify_page_enc_status_changed(unsigned long pfn, + int npages, bool enc) { - PVOP_VCALL1(mmu.exit_mmap, mm); + PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); +} + +static __always_inline void arch_safe_halt(void) +{ + PVOP_VCALL0(irq.safe_halt); +} + +static inline void halt(void) +{ + PVOP_VCALL0(irq.halt); } #ifdef CONFIG_PARAVIRT_XXL @@ -94,12 +128,12 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, /* * These special macros can be used to get or set a debugging register */ -static inline unsigned long paravirt_get_debugreg(int reg) +static __always_inline unsigned long paravirt_get_debugreg(int reg) { return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) -static inline void set_debugreg(unsigned long val, int reg) +static __always_inline void set_debugreg(unsigned long val, int reg) { PVOP_VCALL2(cpu.set_debugreg, reg, val); } @@ -114,24 +148,26 @@ static inline void write_cr0(unsigned long x) PVOP_VCALL1(cpu.write_cr0, x); } -static inline unsigned long read_cr2(void) +static __always_inline unsigned long read_cr2(void) { - return PVOP_CALL0(unsigned long, mmu.read_cr2); + return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, + "mov %%cr2, %%rax;", ALT_NOT_XEN); } -static inline void write_cr2(unsigned long x) +static __always_inline void write_cr2(unsigned long x) { PVOP_VCALL1(mmu.write_cr2, x); } static inline unsigned long __read_cr3(void) { - return PVOP_CALL0(unsigned long, mmu.read_cr3); + return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3, + "mov %%cr3, %%rax;", ALT_NOT_XEN); } static inline void write_cr3(unsigned long x) { - PVOP_VCALL1(mmu.write_cr3, x); + PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN); } static inline void __write_cr4(unsigned long x) @@ -139,55 +175,24 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -#ifdef CONFIG_X86_64 -static inline unsigned long read_cr8(void) -{ - return PVOP_CALL0(unsigned long, cpu.read_cr8); -} - -static inline void write_cr8(unsigned long x) -{ - PVOP_VCALL1(cpu.write_cr8, x); -} -#endif - -static inline void arch_safe_halt(void) -{ - PVOP_VCALL0(irq.safe_halt); -} - -static inline void halt(void) -{ - PVOP_VCALL0(irq.halt); -} - -static inline void wbinvd(void) -{ - PVOP_VCALL0(cpu.wbinvd); -} - -#define get_kernel_rpl() (pv_info.kernel_rpl) - -static inline u64 paravirt_read_msr(unsigned msr) +static inline u64 paravirt_read_msr(u32 msr) { return PVOP_CALL1(u64, cpu.read_msr, msr); } -static inline void paravirt_write_msr(unsigned msr, - unsigned low, unsigned high) +static inline void paravirt_write_msr(u32 msr, u64 val) { - PVOP_VCALL3(cpu.write_msr, msr, low, high); + PVOP_VCALL2(cpu.write_msr, msr, val); } -static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) +static inline int paravirt_read_msr_safe(u32 msr, u64 *val) { - return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err); + return PVOP_CALL2(int, cpu.read_msr_safe, msr, val); } -static inline int paravirt_write_msr_safe(unsigned msr, - unsigned low, unsigned high) +static inline int paravirt_write_msr_safe(u32 msr, u64 val) { - return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high); + return PVOP_CALL2(int, cpu.write_msr_safe, msr, val); } #define rdmsr(msr, val1, val2) \ @@ -197,55 +202,46 @@ do { \ val2 = _l >> 32; \ } while (0) -#define wrmsr(msr, val1, val2) \ -do { \ - paravirt_write_msr(msr, val1, val2); \ -} while (0) +static __always_inline void wrmsr(u32 msr, u32 low, u32 high) +{ + paravirt_write_msr(msr, (u64)high << 32 | low); +} -#define rdmsrl(msr, val) \ +#define rdmsrq(msr, val) \ do { \ val = paravirt_read_msr(msr); \ } while (0) -static inline void wrmsrl(unsigned msr, u64 val) +static inline void wrmsrq(u32 msr, u64 val) { - wrmsr(msr, (u32)val, (u32)(val>>32)); + paravirt_write_msr(msr, val); } -#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b) +static inline int wrmsrq_safe(u32 msr, u64 val) +{ + return paravirt_write_msr_safe(msr, val); +} /* rdmsr with exception handling */ #define rdmsr_safe(msr, a, b) \ ({ \ - int _err; \ - u64 _l = paravirt_read_msr_safe(msr, &_err); \ + u64 _l; \ + int _err = paravirt_read_msr_safe((msr), &_l); \ (*a) = (u32)_l; \ - (*b) = _l >> 32; \ + (*b) = (u32)(_l >> 32); \ _err; \ }) -static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) +static __always_inline int rdmsrq_safe(u32 msr, u64 *p) { - int err; - - *p = paravirt_read_msr_safe(msr, &err); - return err; + return paravirt_read_msr_safe(msr, p); } -static inline unsigned long long paravirt_read_pmc(int counter) +static __always_inline u64 rdpmc(int counter) { return PVOP_CALL1(u64, cpu.read_pmc, counter); } -#define rdpmc(counter, low, high) \ -do { \ - u64 _l = paravirt_read_pmc(counter); \ - low = (u32)_l; \ - high = _l >> 32; \ -} while (0) - -#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) - static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { PVOP_VCALL2(cpu.alloc_ldt, ldt, entries); @@ -283,12 +279,10 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu) PVOP_VCALL2(cpu.load_tls, t, cpu); } -#ifdef CONFIG_X86_64 static inline void load_gs_index(unsigned int gs) { PVOP_VCALL1(cpu.load_gs_index, gs); } -#endif static inline void write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) @@ -306,21 +300,22 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } -static inline void set_iopl_mask(unsigned mask) + +#ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_invalidate_io_bitmap(void) { - PVOP_VCALL1(cpu.set_iopl_mask, mask); + PVOP_VCALL0(cpu.invalidate_io_bitmap); } -static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) +static inline void tss_update_io_bitmap(void) { - PVOP_VCALL2(mmu.activate_mm, prev, next); + PVOP_VCALL0(cpu.update_io_bitmap); } +#endif -static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline void paravirt_enter_mmap(struct mm_struct *next) { - PVOP_VCALL2(mmu.dup_mmap, oldmm, mm); + PVOP_VCALL1(mmu.enter_mmap, next); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) @@ -373,157 +368,92 @@ static inline void paravirt_release_p4d(unsigned long pfn) static inline pte_t __pte(pteval_t val) { - pteval_t ret; - - if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pteval_t, mmu.make_pte, val, (u64)val >> 32); - else - ret = PVOP_CALLEE1(pteval_t, mmu.make_pte, val); - - return (pte_t) { .pte = ret }; + return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val, + "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pteval_t pte_val(pte_t pte) { - pteval_t ret; - - if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pteval_t, mmu.pte_val, - pte.pte, (u64)pte.pte >> 32); - else - ret = PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte); - - return ret; + return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte, + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline pgd_t __pgd(pgdval_t val) { - pgdval_t ret; - - if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pgdval_t, mmu.make_pgd, val, (u64)val >> 32); - else - ret = PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val); - - return (pgd_t) { ret }; + return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val, + "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pgdval_t pgd_val(pgd_t pgd) { - pgdval_t ret; - - if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pgdval_t, mmu.pgd_val, - pgd.pgd, (u64)pgd.pgd >> 32); - else - ret = PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd); - - return ret; + return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd, + "mov %%rdi, %%rax", ALT_NOT_XEN); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION -static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, +static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pteval_t ret; - ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep); + ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep); return (pte_t) { .pte = ret }; } -static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) { - if (sizeof(pteval_t) > sizeof(long)) - /* 5 arg words */ - pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte); - else - PVOP_VCALL4(mmu.ptep_modify_prot_commit, - mm, addr, ptep, pte.pte); -} -static inline void set_pte(pte_t *ptep, pte_t pte) -{ - if (sizeof(pteval_t) > sizeof(long)) - PVOP_VCALL3(mmu.set_pte, ptep, pte.pte, (u64)pte.pte >> 32); - else - PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); + PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void set_pte(pte_t *ptep, pte_t pte) { - if (sizeof(pteval_t) > sizeof(long)) - /* 5 arg words */ - pv_ops.mmu.set_pte_at(mm, addr, ptep, pte); - else - PVOP_VCALL4(mmu.set_pte_at, mm, addr, ptep, pte.pte); + PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); } static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { - pmdval_t val = native_pmd_val(pmd); - - if (sizeof(pmdval_t) > sizeof(long)) - PVOP_VCALL3(mmu.set_pmd, pmdp, val, (u64)val >> 32); - else - PVOP_VCALL2(mmu.set_pmd, pmdp, val); + PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd)); } -#if CONFIG_PGTABLE_LEVELS >= 3 static inline pmd_t __pmd(pmdval_t val) { - pmdval_t ret; - - if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pmdval_t, mmu.make_pmd, val, (u64)val >> 32); - else - ret = PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val); - - return (pmd_t) { ret }; + return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val, + "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pmdval_t pmd_val(pmd_t pmd) { - pmdval_t ret; - - if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pmdval_t, mmu.pmd_val, - pmd.pmd, (u64)pmd.pmd >> 32); - else - ret = PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd); - - return ret; + return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd, + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void set_pud(pud_t *pudp, pud_t pud) { - pudval_t val = native_pud_val(pud); - - if (sizeof(pudval_t) > sizeof(long)) - PVOP_VCALL3(mmu.set_pud, pudp, val, (u64)val >> 32); - else - PVOP_VCALL2(mmu.set_pud, pudp, val); + PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud)); } -#if CONFIG_PGTABLE_LEVELS >= 4 + static inline pud_t __pud(pudval_t val) { pudval_t ret; - ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val); + ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val, + "mov %%rdi, %%rax", ALT_NOT_XEN); return (pud_t) { ret }; } static inline pudval_t pud_val(pud_t pud) { - return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud); + return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud, + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void pud_clear(pud_t *pudp) { - set_pud(pudp, __pud(0)); + set_pud(pudp, native_make_pud(0)); } static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) @@ -533,18 +463,18 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) PVOP_VCALL2(mmu.set_p4d, p4dp, val); } -#if CONFIG_PGTABLE_LEVELS >= 5 - static inline p4d_t __p4d(p4dval_t val) { - p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val); + p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val, + "mov %%rdi, %%rax", ALT_NOT_XEN); return (p4d_t) { ret }; } static inline p4dval_t p4d_val(p4d_t p4d) { - return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d); + return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d, + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) @@ -560,40 +490,15 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) } while (0) #define pgd_clear(pgdp) do { \ - if (pgtable_l5_enabled()) \ - set_pgd(pgdp, __pgd(0)); \ + if (pgtable_l5_enabled()) \ + set_pgd(pgdp, native_make_pgd(0)); \ } while (0) -#endif /* CONFIG_PGTABLE_LEVELS == 5 */ - static inline void p4d_clear(p4d_t *p4dp) { - set_p4d(p4dp, __p4d(0)); + set_p4d(p4dp, native_make_p4d(0)); } -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ - -#endif /* CONFIG_PGTABLE_LEVELS >= 3 */ - -#ifdef CONFIG_X86_PAE -/* Special-case pte-setting operations for PAE, which can't update a - 64-bit pte atomically */ -static inline void set_pte_atomic(pte_t *ptep, pte_t pte) -{ - PVOP_VCALL3(mmu.set_pte_atomic, ptep, pte.pte, pte.pte >> 32); -} - -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - PVOP_VCALL3(mmu.pte_clear, mm, addr, ptep); -} - -static inline void pmd_clear(pmd_t *pmdp) -{ - PVOP_VCALL1(mmu.pmd_clear, pmdp); -} -#else /* !CONFIG_X86_PAE */ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) { set_pte(ptep, pte); @@ -602,14 +507,13 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - set_pte_at(mm, addr, ptep, __pte(0)); + set_pte(ptep, native_make_pte(0)); } static inline void pmd_clear(pmd_t *pmdp) { - set_pmd(pmdp, __pmd(0)); + set_pmd(pmdp, native_make_pmd(0)); } -#endif /* CONFIG_X86_PAE */ #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(struct task_struct *prev) @@ -655,7 +559,9 @@ static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) { - PVOP_VCALLEE1(lock.queued_spin_unlock, lock); + PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock, + "movb $0, (%%" _ASM_ARG1 ");", + ALT_NOT(X86_FEATURE_PVUNLOCK)); } static __always_inline void pv_wait(u8 *ptr, u8 val) @@ -670,7 +576,9 @@ static __always_inline void pv_kick(int cpu) static __always_inline bool pv_vcpu_is_preempted(long cpu) { - return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu); + return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu, + "xor %%" _ASM_AX ", %%" _ASM_AX ";", + ALT_NOT(X86_FEATURE_VCPUPREEMPT)); } void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); @@ -679,16 +587,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 -#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" -#define PV_RESTORE_REGS "popl %edx; popl %ecx;" - /* save and restore all caller-save registers, except return value */ #define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" #define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" - -#define PV_FLAGS_ARG "0" -#define PV_EXTRA_CLOBBERS -#define PV_VEXTRA_CLOBBERS #else /* save and restore all caller-save registers, except return value */ #define PV_SAVE_ALL_CALLER_REGS \ @@ -709,14 +610,6 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); "pop %rsi;" \ "pop %rdx;" \ "pop %rcx;" - -/* We save some registers, but all of them, that's too much. We clobber all - * caller saved registers but the argument parameter */ -#define PV_SAVE_REGS "pushq %%rdi;" -#define PV_RESTORE_REGS "popq %%rdi;" -#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi" -#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi" -#define PV_FLAGS_ARG "D" #endif /* @@ -732,21 +625,27 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); * functions. */ #define PV_THUNK_NAME(func) "__raw_callee_save_" #func -#define PV_CALLEE_SAVE_REGS_THUNK(func) \ +#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \ extern typeof(func) __raw_callee_save_##func; \ \ - asm(".pushsection .text;" \ + asm(".pushsection " section ", \"ax\";" \ ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ + ASM_FUNC_ALIGN \ PV_THUNK_NAME(func) ":" \ + ASM_ENDBR \ FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ PV_RESTORE_ALL_CALLER_REGS \ FRAME_END \ - "ret;" \ + ASM_RET \ + ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ ".popsection") +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + __PV_CALLEE_SAVE_REGS_THUNK(func, ".text") + /* Get a reference to a callee-save function */ #define PV_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { __raw_callee_save_##func }) @@ -756,27 +655,23 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); ((struct paravirt_callee_save) { func }) #ifdef CONFIG_PARAVIRT_XXL -static inline notrace unsigned long arch_local_save_flags(void) +static __always_inline unsigned long arch_local_save_flags(void) { - return PVOP_CALLEE0(unsigned long, irq.save_fl); + return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;", + ALT_NOT_XEN); } -static inline notrace void arch_local_irq_restore(unsigned long f) +static __always_inline void arch_local_irq_disable(void) { - PVOP_VCALLEE1(irq.restore_fl, f); + PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN); } -static inline notrace void arch_local_irq_disable(void) +static __always_inline void arch_local_irq_enable(void) { - PVOP_VCALLEE0(irq.irq_disable); + PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN); } -static inline notrace void arch_local_irq_enable(void) -{ - PVOP_VCALLEE0(irq.irq_enable); -} - -static inline notrace unsigned long arch_local_irq_save(void) +static __always_inline unsigned long arch_local_irq_save(void) { unsigned long f; @@ -803,144 +698,42 @@ static inline notrace unsigned long arch_local_irq_save(void) #undef PVOP_CALL4 extern void default_banner(void); +void native_pv_lock_init(void) __init; -#else /* __ASSEMBLY__ */ - -#define _PVSITE(ptype, ops, word, algn) \ -771:; \ - ops; \ -772:; \ - .pushsection .parainstructions,"a"; \ - .align algn; \ - word 771b; \ - .byte ptype; \ - .byte 772b-771b; \ - .popsection - - -#define COND_PUSH(set, mask, reg) \ - .if ((~(set)) & mask); push %reg; .endif -#define COND_POP(set, mask, reg) \ - .if ((~(set)) & mask); pop %reg; .endif - -#ifdef CONFIG_X86_64 - -#define PV_SAVE_REGS(set) \ - COND_PUSH(set, CLBR_RAX, rax); \ - COND_PUSH(set, CLBR_RCX, rcx); \ - COND_PUSH(set, CLBR_RDX, rdx); \ - COND_PUSH(set, CLBR_RSI, rsi); \ - COND_PUSH(set, CLBR_RDI, rdi); \ - COND_PUSH(set, CLBR_R8, r8); \ - COND_PUSH(set, CLBR_R9, r9); \ - COND_PUSH(set, CLBR_R10, r10); \ - COND_PUSH(set, CLBR_R11, r11) -#define PV_RESTORE_REGS(set) \ - COND_POP(set, CLBR_R11, r11); \ - COND_POP(set, CLBR_R10, r10); \ - COND_POP(set, CLBR_R9, r9); \ - COND_POP(set, CLBR_R8, r8); \ - COND_POP(set, CLBR_RDI, rdi); \ - COND_POP(set, CLBR_RSI, rsi); \ - COND_POP(set, CLBR_RDX, rdx); \ - COND_POP(set, CLBR_RCX, rcx); \ - COND_POP(set, CLBR_RAX, rax) - -#define PARA_PATCH(off) ((off) / 8) -#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8) -#define PARA_INDIRECT(addr) *addr(%rip) -#else -#define PV_SAVE_REGS(set) \ - COND_PUSH(set, CLBR_EAX, eax); \ - COND_PUSH(set, CLBR_EDI, edi); \ - COND_PUSH(set, CLBR_ECX, ecx); \ - COND_PUSH(set, CLBR_EDX, edx) -#define PV_RESTORE_REGS(set) \ - COND_POP(set, CLBR_EDX, edx); \ - COND_POP(set, CLBR_ECX, ecx); \ - COND_POP(set, CLBR_EDI, edi); \ - COND_POP(set, CLBR_EAX, eax) - -#define PARA_PATCH(off) ((off) / 4) -#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4) -#define PARA_INDIRECT(addr) *%cs:addr -#endif - -#ifdef CONFIG_PARAVIRT_XXL -#define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(PV_CPU_iret), \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);) - -#define DISABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) - -#define ENABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) -#endif +#else /* __ASSEMBLER__ */ #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL -/* - * If swapgs is used while the userspace stack is still current, - * there's no way to call a pvop. The PV replacement *must* be - * inlined, or the swapgs instruction must be trapped and emulated. - */ -#define SWAPGS_UNSAFE_STACK \ - PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs) - -/* - * Note: swapgs is very special, and in practise is either going to be - * implemented with a single "swapgs" instruction or something very - * special. Either way, we don't need to save any registers for - * it. - */ -#define SWAPGS \ - PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ - ) -#endif +#ifdef CONFIG_DEBUG_ENTRY -#define GET_CR2_INTO_RAX \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); +#define PARA_INDIRECT(addr) *addr(%rip) -#ifdef CONFIG_PARAVIRT_XXL -#define USERGS_SYSRET64 \ - PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);) +.macro PARA_IRQ_save_fl + ANNOTATE_RETPOLINE_SAFE; + call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); +.endm -#ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(clobbers) \ - PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) +#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;", \ + "ALT_CALL_INSTR;", ALT_CALL_ALWAYS, \ + "pushf; pop %rax;", ALT_NOT_XEN #endif -#endif - -#endif /* CONFIG_X86_32 */ +#endif /* CONFIG_PARAVIRT_XXL */ +#endif /* CONFIG_X86_64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop + +#ifndef __ASSEMBLER__ +static inline void native_pv_lock_init(void) +{ +} +#endif #endif /* !CONFIG_PARAVIRT */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef CONFIG_PARAVIRT_XXL -static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline void paravirt_enter_mmap(struct mm_struct *mm) { } #endif @@ -950,5 +743,11 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { } #endif -#endif /* __ASSEMBLY__ */ + +#ifndef CONFIG_PARAVIRT_SPINLOCKS +static inline void paravirt_set_cap(void) +{ +} +#endif +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..65ac7cee0dad --- /dev/null +++ b/arch/x86/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include <asm/paravirt.h> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 488c59686a73..3502939415ad 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -2,46 +2,12 @@ #ifndef _ASM_X86_PARAVIRT_TYPES_H #define _ASM_X86_PARAVIRT_TYPES_H -/* Bitmask of what can be clobbered: usually at least eax. */ -#define CLBR_NONE 0 -#define CLBR_EAX (1 << 0) -#define CLBR_ECX (1 << 1) -#define CLBR_EDX (1 << 2) -#define CLBR_EDI (1 << 3) +#ifdef CONFIG_PARAVIRT -#ifdef CONFIG_X86_32 -/* CLBR_ANY should match all regs platform has. For i386, that's just it */ -#define CLBR_ANY ((1 << 4) - 1) - -#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) -#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) -#define CLBR_SCRATCH (0) -#else -#define CLBR_RAX CLBR_EAX -#define CLBR_RCX CLBR_ECX -#define CLBR_RDX CLBR_EDX -#define CLBR_RDI CLBR_EDI -#define CLBR_RSI (1 << 4) -#define CLBR_R8 (1 << 5) -#define CLBR_R9 (1 << 6) -#define CLBR_R10 (1 << 7) -#define CLBR_R11 (1 << 8) - -#define CLBR_ANY ((1 << 9) - 1) - -#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ - CLBR_RCX | CLBR_R8 | CLBR_R9) -#define CLBR_RET_REG (CLBR_RAX) -#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) - -#endif /* X86_64 */ - -#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) - -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ +#include <linux/types.h> #include <asm/desc_defs.h> -#include <asm/kmap_types.h> #include <asm/pgtable_types.h> #include <asm/nospec-branch.h> @@ -55,6 +21,7 @@ struct task_struct; struct cpumask; struct flush_tlb_info; struct mmu_gather; +struct vm_area_struct; /* * Wrapper type for pointers to code which uses the non-standard @@ -67,30 +34,12 @@ struct paravirt_callee_save { /* general info */ struct pv_info { #ifdef CONFIG_PARAVIRT_XXL - unsigned int kernel_rpl; - int shared_kernel_pmd; - -#ifdef CONFIG_X86_64 u16 extra_user_64bit_cs; /* __USER_CS if none */ #endif -#endif const char *name; }; -struct pv_init_ops { - /* - * Patch may replace one of the defined code sequences with - * arbitrary code, subject to the same register constraints. - * This generally means the code is not free to clobber any - * registers other than EAX. The patch function should return - * the number of bytes of code generated, as we nop pad the - * rest in generic code. - */ - unsigned (*patch)(u8 type, void *insnbuf, - unsigned long addr, unsigned len); -} __no_randomize_layout; - #ifdef CONFIG_PARAVIRT_XXL struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ @@ -100,11 +49,6 @@ struct pv_lazy_ops { } __no_randomize_layout; #endif -struct pv_time_ops { - unsigned long long (*sched_clock)(void); - unsigned long long (*steal_clock)(int cpu); -} __no_randomize_layout; - struct pv_cpu_ops { /* hooks for various privileged instructions */ void (*io_delay)(void); @@ -118,11 +62,6 @@ struct pv_cpu_ops { void (*write_cr4)(unsigned long); -#ifdef CONFIG_X86_64 - unsigned long (*read_cr8)(void); - void (*write_cr8)(unsigned long); -#endif - /* Segment descriptor handling */ void (*load_tr_desc)(void); void (*load_gdt)(const struct desc_ptr *); @@ -130,9 +69,7 @@ struct pv_cpu_ops { void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); -#ifdef CONFIG_X86_64 void (*load_gs_index)(unsigned int idx); -#endif void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, const void *desc); void (*write_gdt_entry)(struct desc_struct *, @@ -144,41 +81,28 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); - void (*set_iopl_mask)(unsigned mask); - - void (*wbinvd)(void); +#ifdef CONFIG_X86_IOPL_IOPERM + void (*invalidate_io_bitmap)(void); + void (*update_io_bitmap)(void); +#endif /* cpuid emulation, mostly so that caps bits can be disabled */ void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); /* Unsafe MSR operations. These will warn or panic on failure. */ - u64 (*read_msr)(unsigned int msr); - void (*write_msr)(unsigned int msr, unsigned low, unsigned high); + u64 (*read_msr)(u32 msr); + void (*write_msr)(u32 msr, u64 val); /* * Safe MSR operations. - * read sets err to 0 or -EIO. write returns 0 or -EIO. + * Returns 0 or -EIO. */ - u64 (*read_msr_safe)(unsigned int msr, int *err); - int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high); + int (*read_msr_safe)(u32 msr, u64 *val); + int (*write_msr_safe)(u32 msr, u64 val); u64 (*read_pmc)(int counter); - /* - * Switch to usermode gs and return to 64-bit usermode using - * sysret. Only used in 64-bit kernels to return to 64-bit - * processes. Usermode register state, including %rsp, must - * already be restored. - */ - void (*usergs_sysret64)(void); - - /* Normal iret. Jump to this with the standard iret stack - frame set up. */ - void (*iret)(void); - - void (*swapgs)(void); - void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); #endif @@ -187,22 +111,18 @@ struct pv_cpu_ops { struct pv_irq_ops { #ifdef CONFIG_PARAVIRT_XXL /* - * Get/set interrupt state. save_fl and restore_fl are only - * expected to use X86_EFLAGS_IF; all other bits - * returned from save_fl are undefined, and may be ignored by - * restore_fl. + * Get/set interrupt state. save_fl is expected to use X86_EFLAGS_IF; + * all other bits returned from save_fl are undefined. * * NOTE: These functions callers expect the callee to preserve * more registers than the standard C calling convention. */ struct paravirt_callee_save save_fl; - struct paravirt_callee_save restore_fl; struct paravirt_callee_save irq_disable; struct paravirt_callee_save irq_enable; - +#endif void (*safe_halt)(void); void (*halt)(void); -#endif } __no_randomize_layout; struct pv_mmu_ops { @@ -210,26 +130,22 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_one_user)(unsigned long addr); - void (*flush_tlb_others)(const struct cpumask *cpus, - const struct flush_tlb_info *info); - - void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); + void (*flush_tlb_multi)(const struct cpumask *cpus, + const struct flush_tlb_info *info); /* Hook for intercepting the destruction of an mm_struct. */ void (*exit_mmap)(struct mm_struct *mm); + void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc); #ifdef CONFIG_PARAVIRT_XXL - unsigned long (*read_cr2)(void); + struct paravirt_callee_save read_cr2; void (*write_cr2)(unsigned long); unsigned long (*read_cr3)(void); void (*write_cr3)(unsigned long); - /* Hooks for intercepting the creation/use of an mm_struct. */ - void (*activate_mm)(struct mm_struct *prev, - struct mm_struct *next); - void (*dup_mmap)(struct mm_struct *oldmm, - struct mm_struct *mm); + /* Hook for intercepting the creation/use of an mm_struct. */ + void (*enter_mmap)(struct mm_struct *mm); /* Hooks for allocating and freeing a pagetable top-level */ int (*pgd_alloc)(struct mm_struct *mm); @@ -250,13 +166,11 @@ struct pv_mmu_ops { /* Pagetable manipulation functions */ void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, + pte_t (*ptep_modify_prot_start)(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); - void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, + void (*ptep_modify_prot_commit)(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte); struct paravirt_callee_save pte_val; @@ -265,36 +179,20 @@ struct pv_mmu_ops { struct paravirt_callee_save pgd_val; struct paravirt_callee_save make_pgd; -#if CONFIG_PGTABLE_LEVELS >= 3 -#ifdef CONFIG_X86_PAE - void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (*pte_clear)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); - void (*pmd_clear)(pmd_t *pmdp); - -#endif /* CONFIG_X86_PAE */ - void (*set_pud)(pud_t *pudp, pud_t pudval); struct paravirt_callee_save pmd_val; struct paravirt_callee_save make_pmd; -#if CONFIG_PGTABLE_LEVELS >= 4 struct paravirt_callee_save pud_val; struct paravirt_callee_save make_pud; void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval); -#if CONFIG_PGTABLE_LEVELS >= 5 struct paravirt_callee_save p4d_val; struct paravirt_callee_save make_p4d; void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval); -#endif /* CONFIG_PGTABLE_LEVELS >= 5 */ - -#endif /* CONFIG_PGTABLE_LEVELS >= 4 */ - -#endif /* CONFIG_PGTABLE_LEVELS >= 3 */ struct pv_lazy_ops lazy_mode; @@ -328,8 +226,6 @@ struct pv_lock_ops { * number for each function using the offset which we use to indicate * what to patch. */ struct paravirt_patch_template { - struct pv_init_ops init; - struct pv_time_ops time; struct pv_cpu_ops cpu; struct pv_irq_ops irq; struct pv_mmu_ops mmu; @@ -339,60 +235,22 @@ struct paravirt_patch_template { extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; -#define PARAVIRT_PATCH(x) \ - (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) - -#define paravirt_type(op) \ - [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "i" (&(pv_ops.op)) -#define paravirt_clobber(clobber) \ - [paravirt_clobber] "i" (clobber) - -/* - * Generate some code, and mark it as patchable by the - * apply_paravirt() alternate instruction patcher. - */ -#define _paravirt_alt(insn_string, type, clobber) \ - "771:\n\t" insn_string "\n" "772:\n" \ - ".pushsection .parainstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR " 771b\n" \ - " .byte " type "\n" \ - " .byte 772b-771b\n" \ - " .short " clobber "\n" \ - ".popsection\n" - -/* Generate patchable code, with the default asm parameters. */ -#define paravirt_alt(insn_string) \ - _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") - -/* Simple instruction patching code. */ -#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" - -#define DEF_NATIVE(ops, name, code) \ - __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ - asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) - -unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); -unsigned paravirt_patch_default(u8 type, void *insnbuf, - unsigned long addr, unsigned len); - -unsigned paravirt_patch_insns(void *insnbuf, unsigned len, - const char *start, const char *end); - -unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len); - -int paravirt_disable_iospace(void); +#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op) /* * This generates an indirect call based on the operation type number. - * The type number, computed in PARAVIRT_PATCH, is derived from the - * offset into the paravirt_patch_template structure, and can therefore be - * freely converted back into a structure offset. + * + * Since alternatives run after enabling CET/IBT -- the latter setting/clearing + * capabilities and the former requiring all capabilities being finalized -- + * these indirect calls are subject to IBT and the paravirt stubs should have + * ENDBR on. + * + * OTOH since this is effectively a __nocfi indirect call, the paravirt stubs + * don't need to bother with CFI prefixes. */ #define PARAVIRT_CALL \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%c[paravirt_opptr];" + ANNOTATE_RETPOLINE_SAFE "\n\t" \ + "call *%[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt @@ -405,7 +263,7 @@ int paravirt_disable_iospace(void); * Unfortunately, this is a relatively slow operation for modern CPUs, * because it cannot necessarily determine what the destination * address is. In this case, the address is a runtime constant, so at - * the very least we can patch the call to e a simple direct call, or + * the very least we can patch the call to a simple direct call, or, * ideally, patch an inline implementation into the callsite. (Direct * calls are essentially free, because the call and return addresses * are completely predictable.) @@ -416,18 +274,12 @@ int paravirt_disable_iospace(void); * on the stack. All caller-save registers (eax,edx,ecx) are expected * to be modified (either clobbered or used for return values). * X86_64, on the other hand, already specifies a register-based calling - * conventions, returning at %rax, with parameteres going on %rdi, %rsi, + * conventions, returning at %rax, with parameters going in %rdi, %rsi, * %rdx, and %rcx. Note that for this reason, x86_64 does not need any * special handling for dealing with 4 arguments, unlike i386. - * However, x86_64 also have to clobber all caller saved registers, which + * However, x86_64 also has to clobber all caller saved registers, which * unfortunately, are quite a bit (r8 - r11) * - * The call instruction itself is marked by placing its start address - * and size into the .parainstructions section, so that - * apply_paravirt() in arch/i386/kernel/alternative.c can do the - * appropriate patching under the control of the backend pv_init_ops - * implementation. - * * Unfortunately there's no way to get gcc to generate the args setup * for the call, and then allow the call itself to be generated by an * inline asm. Because of this, we must do the complete arg setup and @@ -437,33 +289,31 @@ int paravirt_disable_iospace(void); * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. * It could be extended to more arguments, but there would be little * to be gained from that. For each number of arguments, there are - * the two VCALL and CALL variants for void and non-void functions. + * two VCALL and CALL variants for void and non-void functions. * * When there is a return value, the invoker of the macro must specify * the return type. The macro then uses sizeof() on that type to - * determine whether its a 32 or 64 bit value, and places the return + * determine whether it's a 32 or 64 bit value and places the return * in the right register(s) (just %eax for 32-bit, and %edx:%eax for - * 64-bit). For x86_64 machines, it just returns at %rax regardless of + * 64-bit). For x86_64 machines, it just returns in %rax regardless of * the return value size. * - * 64-bit arguments are passed as a pair of adjacent 32-bit arguments + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments; * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments * in low,high order * * Small structures are passed and returned in registers. The macro * calling convention can't directly deal with this, so the wrapper - * functions must do this. + * functions must do it. * * These PVOP_* macros are only defined within this header. This * means that all uses must be wrapped in inline functions. This also * makes sure the incoming and outgoing types are always correct. */ #ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS \ +#define PVOP_CALL_ARGS \ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS - #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) @@ -479,12 +329,10 @@ int paravirt_disable_iospace(void); #define VEXTRA_CLOBBERS #else /* CONFIG_X86_64 */ /* [re]ax isn't an arg, but the return val */ -#define PVOP_VCALL_ARGS \ +#define PVOP_CALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ __edx = __edx, __ecx = __ecx, __eax = __eax; -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS - #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) @@ -495,8 +343,17 @@ int paravirt_disable_iospace(void); "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) -/* void functions are still allowed [re]ax for scratch */ +/* + * void functions are still allowed [re]ax for scratch. + * + * The ZERO_CALL_USED REGS feature may end up zeroing out callee-saved + * registers. Make sure we model this with the appropriate clobbers. + */ +#ifdef CONFIG_ZERO_CALL_USED_REGS +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), PVOP_VCALL_CLOBBERS +#else #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#endif #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" @@ -509,183 +366,165 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)pv_ops.op) #endif -#define PVOP_RETMASK(rettype) \ +#define PVOP_RETVAL(rettype) \ ({ unsigned long __mask = ~0UL; \ + BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long)); \ switch (sizeof(rettype)) { \ case 1: __mask = 0xffUL; break; \ case 2: __mask = 0xffffUL; break; \ case 4: __mask = 0xffffffffUL; break; \ default: break; \ } \ - __mask; \ + __mask & __eax; \ }) - -#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ - pre, post, ...) \ +/* + * Use alternative patching for paravirt calls: + * - For replacing an indirect call with a direct one, use the "normal" + * ALTERNATIVE() macro with the indirect call as the initial code sequence, + * which will be replaced with the related direct call by using the + * ALT_FLAG_DIRECT_CALL special case and the "always on" feature. + * - In case the replacement is either a direct call or a short code sequence + * depending on a feature bit, the ALTERNATIVE_2() macro is being used. + * The indirect call is the initial code sequence again, while the special + * code sequence is selected with the specified feature bit. In case the + * feature is not active, the direct call is used as above via the + * ALT_FLAG_DIRECT_CALL special case and the "always on" feature. + */ +#define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...) \ ({ \ - rettype __ret; \ PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ - /* This is 32-bit specific, but is okay in 64-bit */ \ - /* since this condition will never hold */ \ - if (sizeof(rettype) > sizeof(unsigned long)) { \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ - : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ - ##__VA_ARGS__ \ - : "memory", "cc" extra_clbr); \ - __ret = (rettype)((((u64)__edx) << 32) | __eax); \ - } else { \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ - : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ - ##__VA_ARGS__ \ - : "memory", "cc" extra_clbr); \ - __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \ - } \ - __ret; \ + asm volatile(ALTERNATIVE(PARAVIRT_CALL, ALT_CALL_INSTR, \ + ALT_CALL_ALWAYS) \ + : call_clbr, ASM_CALL_CONSTRAINT \ + : paravirt_ptr(op), \ + ##__VA_ARGS__ \ + : "memory", "cc" extra_clbr); \ + ret; \ }) -#define __PVOP_CALL(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ - EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) - -#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ - PVOP_CALLEE_CLOBBERS, , \ - pre, post, ##__VA_ARGS__) - - -#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ +#define ____PVOP_ALT_CALL(ret, op, alt, cond, call_clbr, \ + extra_clbr, ...) \ ({ \ - PVOP_VCALL_ARGS; \ + PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ - asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ - post \ + asm volatile(ALTERNATIVE_2(PARAVIRT_CALL, \ + ALT_CALL_INSTR, ALT_CALL_ALWAYS, \ + alt, cond) \ : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_type(op), \ - paravirt_clobber(clbr), \ + : paravirt_ptr(op), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ + ret; \ }) -#define __PVOP_VCALL(op, pre, post, ...) \ - ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ - VEXTRA_CLOBBERS, \ - pre, post, ##__VA_ARGS__) +#define __PVOP_CALL(rettype, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op, \ + PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__) -#define __PVOP_VCALLEESAVE(op, pre, post, ...) \ - ____PVOP_VCALL(op.func, CLBR_RET_REG, \ - PVOP_VCALLEE_CLOBBERS, , \ - pre, post, ##__VA_ARGS__) +#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, \ + PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \ + ##__VA_ARGS__) +#define __PVOP_CALLEESAVE(rettype, op, ...) \ + ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, \ + PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) + +#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \ + ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \ + PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__) + + +#define __PVOP_VCALL(op, ...) \ + (void)____PVOP_CALL(, op, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, ##__VA_ARGS__) + +#define __PVOP_ALT_VCALL(op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, op, alt, cond, \ + PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \ + ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(op, ...) \ + (void)____PVOP_CALL(, op.func, \ + PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) + +#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \ + (void)____PVOP_ALT_CALL(, op.func, alt, cond, \ + PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__) #define PVOP_CALL0(rettype, op) \ - __PVOP_CALL(rettype, op, "", "") + __PVOP_CALL(rettype, op) #define PVOP_VCALL0(op) \ - __PVOP_VCALL(op, "", "") + __PVOP_VCALL(op) +#define PVOP_ALT_CALL0(rettype, op, alt, cond) \ + __PVOP_ALT_CALL(rettype, op, alt, cond) +#define PVOP_ALT_VCALL0(op, alt, cond) \ + __PVOP_ALT_VCALL(op, alt, cond) #define PVOP_CALLEE0(rettype, op) \ - __PVOP_CALLEESAVE(rettype, op, "", "") + __PVOP_CALLEESAVE(rettype, op) #define PVOP_VCALLEE0(op) \ - __PVOP_VCALLEESAVE(op, "", "") + __PVOP_VCALLEESAVE(op) +#define PVOP_ALT_CALLEE0(rettype, op, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond) +#define PVOP_ALT_VCALLEE0(op, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(op, alt, cond) #define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1)) #define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALL1(op, arg1, alt, cond) \ + __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1)) #define PVOP_CALLEE1(rettype, op, arg1) \ - __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1)) #define PVOP_VCALLEE1(op, arg1) \ - __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) + __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond) \ + __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1)) +#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond) \ + __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1)) #define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) #define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) - -#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ - __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) -#define PVOP_VCALLEE2(op, arg1, arg2) \ - __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ - PVOP_CALL_ARG2(arg2)) - + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2)) #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), \ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) #define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), \ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) -/* This is the only difference in x86_64. We can make it much simpler */ -#ifdef CONFIG_X86_32 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ __PVOP_CALL(rettype, op, \ - "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ - PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) -#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, \ - "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) -#else -#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, "", "", \ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, "", "", \ - PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -#endif - -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; - -enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_start_context_switch(struct task_struct *prev); -void paravirt_end_context_switch(struct task_struct *next); -void paravirt_enter_lazy_mmu(void); -void paravirt_leave_lazy_mmu(void); -void paravirt_flush_lazy_mmu(void); - -void _paravirt_nop(void); +unsigned long paravirt_ret0(void); +#ifdef CONFIG_PARAVIRT_XXL u64 _paravirt_ident_64(u64); +unsigned long pv_native_save_fl(void); +void pv_native_irq_disable(void); +void pv_native_irq_enable(void); +unsigned long pv_native_read_cr2(void); +#endif -#define paravirt_nop ((void *)_paravirt_nop) - -/* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch_site { - u8 *instr; /* original instructions */ - u8 instrtype; /* type of this instruction */ - u8 len; /* length of original instruction */ -}; +#define paravirt_nop ((void *)nop_func) -extern struct paravirt_patch_site __parainstructions[], - __parainstructions_end[]; +#endif /* __ASSEMBLER__ */ -#endif /* __ASSEMBLY__ */ +#define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV) +#endif /* CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_TYPES_H */ diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h deleted file mode 100644 index 92015c65fa2a..000000000000 --- a/arch/x86/include/asm/pat.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PAT_H -#define _ASM_X86_PAT_H - -#include <linux/types.h> -#include <asm/pgtable_types.h> - -bool pat_enabled(void); -void pat_disable(const char *reason); -extern void pat_init(void); -extern void init_cache_modes(void); - -extern int reserve_memtype(u64 start, u64 end, - enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int free_memtype(u64 start, u64 end); - -extern int kernel_map_sync_memtype(u64 base, unsigned long size, - enum page_cache_mode pcm); - -int io_reserve_memtype(resource_size_t start, resource_size_t end, - enum page_cache_mode *pcm); - -void io_free_memtype(resource_size_t start, resource_size_t end); - -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); - -#endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pc-conf-reg.h b/arch/x86/include/asm/pc-conf-reg.h new file mode 100644 index 000000000000..56bceceacf5f --- /dev/null +++ b/arch/x86/include/asm/pc-conf-reg.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for the configuration register space at port I/O locations + * 0x22 and 0x23 variously used by PC architectures, e.g. the MP Spec, + * Cyrix CPUs, numerous chipsets. + */ +#ifndef _ASM_X86_PC_CONF_REG_H +#define _ASM_X86_PC_CONF_REG_H + +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#define PC_CONF_INDEX 0x22 +#define PC_CONF_DATA 0x23 + +#define PC_CONF_MPS_IMCR 0x70 + +extern raw_spinlock_t pc_conf_lock; + +static inline u8 pc_conf_get(u8 reg) +{ + outb(reg, PC_CONF_INDEX); + return inb(PC_CONF_DATA); +} + +static inline void pc_conf_set(u8 reg, u8 data) +{ + outb(reg, PC_CONF_INDEX); + outb(data, PC_CONF_DATA); +} + +#endif /* _ASM_X86_PC_CONF_REG_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 662963681ea6..b3ab80a03365 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -7,11 +7,9 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/scatterlist.h> +#include <linux/numa.h> #include <asm/io.h> -#include <asm/pat.h> -#include <asm/x86_init.h> - -#ifdef __KERNEL__ +#include <asm/memtype.h> struct pci_sysdata { int domain; /* PCI domain */ @@ -22,11 +20,11 @@ struct pci_sysdata { #ifdef CONFIG_X86_64 void *iommu; /* IOMMU private data */ #endif -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +#ifdef CONFIG_PCI_MSI void *fwnode; /* IRQ domain for MSI assignment */ #endif #if IS_ENABLED(CONFIG_VMD) - bool vmd_domain; /* True if in Intel VMD domain */ + struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ #endif }; @@ -34,14 +32,17 @@ extern int pci_routeirq; extern int noioapicquirk; extern int noioapicreroute; +static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus) +{ + return bus->sysdata; +} + #ifdef CONFIG_PCI #ifdef CONFIG_PCI_DOMAINS static inline int pci_domain_nr(struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; - - return sd->domain; + return to_pci_sysdata(bus)->domain; } static inline int pci_proc_domain(struct pci_bus *bus) @@ -50,27 +51,23 @@ static inline int pci_proc_domain(struct pci_bus *bus) } #endif -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +#ifdef CONFIG_PCI_MSI static inline void *_pci_root_bus_fwnode(struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; - - return sd->fwnode; + return to_pci_sysdata(bus)->fwnode; } #define pci_root_bus_fwnode _pci_root_bus_fwnode #endif +#if IS_ENABLED(CONFIG_VMD) static inline bool is_vmd(struct pci_bus *bus) { -#if IS_ENABLED(CONFIG_VMD) - struct pci_sysdata *sd = bus->sysdata; - - return sd->vmd_domain; -#else - return false; -#endif + return to_pci_sysdata(bus)->vmd_dev != NULL; } +#else +#define is_vmd(bus) false +#endif /* CONFIG_VMD */ /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes @@ -94,6 +91,7 @@ void pcibios_scan_root(int bus); struct irq_routing_table *pcibios_get_irq_routing_table(void); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); +bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev); #define HAVE_PCI_MMAP #define arch_can_pci_mmap_wc() pat_enabled() @@ -107,32 +105,11 @@ static inline void early_quirks(void) { } extern void pci_iommu_alloc(void); -#ifdef CONFIG_PCI_MSI -/* implemented in arch/x86/kernel/apic/io_apic. */ -struct msi_desc; -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -void native_teardown_msi_irq(unsigned int irq); -void native_restore_msi_irqs(struct pci_dev *dev); -#else -#define native_setup_msi_irqs NULL -#define native_teardown_msi_irq NULL -#endif -#endif /* __KERNEL__ */ - -#ifdef CONFIG_X86_64 -#include <asm/pci_64.h> -#endif - -/* generic pci stuff */ -#include <asm-generic/pci.h> - #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ static inline int __pcibus_to_node(const struct pci_bus *bus) { - const struct pci_sysdata *sd = bus->sysdata; - - return sd->node; + return to_pci_sysdata(bus)->node; } static inline const struct cpumask * @@ -141,21 +118,9 @@ cpumask_of_pcibus(const struct pci_bus *bus) int node; node = __pcibus_to_node(bus); - return (node == -1) ? cpu_online_mask : + return (node == NUMA_NO_NODE) ? cpu_online_mask : cpumask_of_node(node); } #endif -struct pci_setup_rom { - struct setup_data data; - uint16_t vendor; - uint16_t devid; - uint64_t pcilen; - unsigned long segment; - unsigned long bus; - unsigned long device; - unsigned long function; - uint8_t romdata[0]; -}; - #endif /* _ASM_X86_PCI_H */ diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h deleted file mode 100644 index f5411de0ae11..000000000000 --- a/arch/x86/include/asm/pci_64.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PCI_64_H -#define _ASM_X86_PCI_64_H - -#ifdef __KERNEL__ - -#ifdef CONFIG_CALGARY_IOMMU -static inline void *pci_iommu(struct pci_bus *bus) -{ - struct pci_sysdata *sd = bus->sysdata; - return sd->iommu; -} - -static inline void set_pci_iommu(struct pci_bus *bus, void *val) -{ - struct pci_sysdata *sd = bus->sysdata; - sd->iommu = val; -} -#endif /* CONFIG_CALGARY_IOMMU */ - -extern int (*pci_config_read)(int seg, int bus, int dev, int fn, - int reg, int len, u32 *value); -extern int (*pci_config_write)(int seg, int bus, int dev, int fn, - int reg, int len, u32 value); - -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 73bb404f4d2a..70533fdcbf02 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -5,7 +5,10 @@ * (c) 1999 Martin Mares <mj@ucw.cz> */ +#include <linux/errno.h> +#include <linux/init.h> #include <linux/ioport.h> +#include <linux/spinlock.h> #undef DEBUG @@ -39,6 +42,8 @@ do { \ #define PCI_ROOT_NO_CRS 0x100000 #define PCI_NOASSIGN_BARS 0x200000 #define PCI_BIG_ROOT_WINDOW 0x400000 +#define PCI_USE_E820 0x800000 +#define PCI_NO_E820 0x1000000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; @@ -64,6 +69,8 @@ void pcibios_scan_specific_bus(int busn); /* pci-irq.c */ +struct pci_dev; + struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { @@ -87,7 +94,16 @@ struct irq_routing_table { u32 miniport_data; /* Crap */ u8 rfu[11]; u8 checksum; /* Modulo 256 checksum must give 0 */ - struct irq_info slots[0]; + struct irq_info slots[]; +} __attribute__((packed)); + +struct irt_routing_table { + u32 signature; /* IRT_SIGNATURE should be here */ + u8 size; /* Number of entries provided */ + u8 used; /* Number of entries actually used */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + struct irq_info slots[]; } __attribute__((packed)); extern unsigned int pcibios_irq_mask; @@ -114,9 +130,20 @@ extern const struct pci_raw_ops pci_direct_conf1; extern bool port_cf9_safe; /* arch_initcall level */ +#ifdef CONFIG_PCI_DIRECT extern int pci_direct_probe(void); extern void pci_direct_init(int type); +#else +static inline int pci_direct_probe(void) { return -1; } +static inline void pci_direct_init(int type) { } +#endif + +#ifdef CONFIG_PCI_BIOS extern void pci_pcbios_init(void); +#else +static inline void pci_pcbios_init(void) { } +#endif + extern void __init dmi_check_pciprobe(void); extern void __init dmi_check_skip_isa_align(void); @@ -221,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) # define x86_default_pci_init_irq NULL # define x86_default_pci_fixup_irqs NULL #endif + +#if defined(CONFIG_PCI) && defined(CONFIG_ACPI) +extern bool pci_use_e820; +#else +#define pci_use_e820 false +#endif diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 1a19d11cfbbd..725d0eff7acd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -3,550 +3,604 @@ #define _ASM_X86_PERCPU_H #ifdef CONFIG_X86_64 -#define __percpu_seg gs -#define __percpu_mov_op movq +# define __percpu_seg gs +# define __percpu_rel (%rip) #else -#define __percpu_seg fs -#define __percpu_mov_op movl +# define __percpu_seg fs +# define __percpu_rel #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * reg - 32bit register - * - * The resulting address is stored in the "reg" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \ - lea var(reg), reg -#define PER_CPU_VAR(var) %__percpu_seg:var -#else /* ! SMP */ -#define PER_CPU(var, reg) __percpu_mov_op $var, reg -#define PER_CPU_VAR(var) var -#endif /* SMP */ - -#ifdef CONFIG_X86_64_SMP -#define INIT_PER_CPU_VAR(var) init_per_cpu__##var +# define __percpu %__percpu_seg: #else -#define INIT_PER_CPU_VAR(var) var +# define __percpu #endif -#else /* ...!ASSEMBLY */ +#define PER_CPU_VAR(var) __percpu(var)__percpu_rel -#include <linux/kernel.h> +#else /* !__ASSEMBLY__: */ + +#include <linux/args.h> +#include <linux/bits.h> +#include <linux/build_bug.h> #include <linux/stringify.h> +#include <asm/asm.h> #ifdef CONFIG_SMP -#define __percpu_prefix "%%"__stringify(__percpu_seg)":" -#define __my_cpu_offset this_cpu_read(this_cpu_off) + +#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":" + +#ifdef CONFIG_CC_HAS_NAMED_AS + +#ifdef __CHECKER__ +# define __seg_gs __attribute__((address_space(__seg_gs))) +# define __seg_fs __attribute__((address_space(__seg_fs))) +#endif + +#define __percpu_prefix +#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg) + +#else /* !CONFIG_CC_HAS_NAMED_AS: */ + +#define __percpu_prefix __force_percpu_prefix +#define __percpu_seg_override + +#endif /* CONFIG_CC_HAS_NAMED_AS */ /* * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. */ -#define arch_raw_cpu_ptr(ptr) \ -({ \ - unsigned long tcp_ptr__; \ - asm volatile("add " __percpu_arg(1) ", %0" \ - : "=r" (tcp_ptr__) \ - : "m" (this_cpu_off), "0" (ptr)); \ - (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ +#define __my_cpu_offset this_cpu_read(this_cpu_off) + +/* + * arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit + * kernel, because games are played with CONFIG_X86_64 there and + * sizeof(this_cpu_off) becames 4. + */ +#ifndef BUILD_VDSO32_64 +#define arch_raw_cpu_ptr(_ptr) \ +({ \ + unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \ + \ + tcp_ptr__ += (__force unsigned long)(_ptr); \ + (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__; \ }) #else -#define __percpu_prefix "" +#define arch_raw_cpu_ptr(_ptr) \ +({ \ + BUILD_BUG(); \ + (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)0; \ +}) #endif +#define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel + +#else /* !CONFIG_SMP: */ + +#define __force_percpu_prefix +#define __percpu_prefix +#define __percpu_seg_override + +#define PER_CPU_VAR(var) (var)__percpu_rel + +#endif /* CONFIG_SMP */ + +#if defined(CONFIG_USE_X86_SEG_SUPPORT) && defined(USE_TYPEOF_UNQUAL) +# define __my_cpu_type(var) typeof(var) +# define __my_cpu_ptr(ptr) (ptr) +# define __my_cpu_var(var) (var) + +# define __percpu_qual __percpu_seg_override +#else +# define __my_cpu_type(var) typeof(var) __percpu_seg_override +# define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr) +# define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) +#endif + +#define __force_percpu_arg(x) __force_percpu_prefix "%" #x #define __percpu_arg(x) __percpu_prefix "%" #x /* - * Initialized pointers to per-cpu variables needed for the boot - * processor need to use these macros to get the proper address - * offset from __per_cpu_load on SMP. - * - * There also must be an entry in vmlinux_64.lds.S + * For arch-specific code, we can use direct single-insn ops (they + * don't give an lvalue though). */ -#define DECLARE_INIT_PER_CPU(var) \ - extern typeof(var) init_per_cpu_var(var) -#ifdef CONFIG_X86_64_SMP -#define init_per_cpu_var(var) init_per_cpu__##var -#else -#define init_per_cpu_var(var) var -#endif +#define __pcpu_type_1 u8 +#define __pcpu_type_2 u16 +#define __pcpu_type_4 u32 +#define __pcpu_type_8 u64 -/* For arch-specific code, we can use direct single-insn ops (they - * don't give an lvalue though). */ -extern void __bad_percpu_size(void); - -#define percpu_to_op(op, var, val) \ -do { \ - typedef typeof(var) pto_T__; \ - if (0) { \ - pto_T__ pto_tmp__; \ - pto_tmp__ = (val); \ - (void)pto_tmp__; \ - } \ - switch (sizeof(var)) { \ - case 1: \ - asm(op "b %1,"__percpu_arg(0) \ - : "+m" (var) \ - : "qi" ((pto_T__)(val))); \ - break; \ - case 2: \ - asm(op "w %1,"__percpu_arg(0) \ - : "+m" (var) \ - : "ri" ((pto_T__)(val))); \ - break; \ - case 4: \ - asm(op "l %1,"__percpu_arg(0) \ - : "+m" (var) \ - : "ri" ((pto_T__)(val))); \ - break; \ - case 8: \ - asm(op "q %1,"__percpu_arg(0) \ - : "+m" (var) \ - : "re" ((pto_T__)(val))); \ - break; \ - default: __bad_percpu_size(); \ - } \ +#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff)) +#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff)) +#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff)) +#define __pcpu_cast_8(val) ((u64)(val)) + +#define __pcpu_op_1(op) op "b " +#define __pcpu_op_2(op) op "w " +#define __pcpu_op_4(op) op "l " +#define __pcpu_op_8(op) op "q " + +#define __pcpu_reg_1(mod, x) mod "q" (x) +#define __pcpu_reg_2(mod, x) mod "r" (x) +#define __pcpu_reg_4(mod, x) mod "r" (x) +#define __pcpu_reg_8(mod, x) mod "r" (x) + +#define __pcpu_reg_imm_1(x) "qi" (x) +#define __pcpu_reg_imm_2(x) "ri" (x) +#define __pcpu_reg_imm_4(x) "ri" (x) +#define __pcpu_reg_imm_8(x) "re" (x) + +#ifdef CONFIG_USE_X86_SEG_SUPPORT + +#define __raw_cpu_read(size, qual, pcp) \ +({ \ + *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \ +}) + +#define __raw_cpu_write(size, qual, pcp, val) \ +do { \ + *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \ +} while (0) + +#define __raw_cpu_read_const(pcp) __raw_cpu_read(, , pcp) + +#else /* !CONFIG_USE_X86_SEG_SUPPORT: */ + +#define __raw_cpu_read(size, qual, _var) \ +({ \ + __pcpu_type_##size pfo_val__; \ + \ + asm qual (__pcpu_op_##size("mov") \ + __percpu_arg([var]) ", %[val]" \ + : [val] __pcpu_reg_##size("=", pfo_val__) \ + : [var] "m" (__my_cpu_var(_var))); \ + \ + (typeof(_var))(unsigned long) pfo_val__; \ +}) + +#define __raw_cpu_write(size, qual, _var, _val) \ +do { \ + __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \ + \ + if (0) { \ + TYPEOF_UNQUAL(_var) pto_tmp__; \ + pto_tmp__ = (_val); \ + (void)pto_tmp__; \ + } \ + asm qual (__pcpu_op_##size("mov") "%[val], " \ + __percpu_arg([var]) \ + : [var] "=m" (__my_cpu_var(_var)) \ + : [val] __pcpu_reg_imm_##size(pto_val__)); \ } while (0) /* - * Generate a percpu add to memory instruction and optimize code + * The generic per-CPU infrastrucutre is not suitable for + * reading const-qualified variables. + */ +#define __raw_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) + +#endif /* CONFIG_USE_X86_SEG_SUPPORT */ + +#define __raw_cpu_read_stable(size, _var) \ +({ \ + __pcpu_type_##size pfo_val__; \ + \ + asm(__pcpu_op_##size("mov") \ + __force_percpu_arg(a[var]) ", %[val]" \ + : [val] __pcpu_reg_##size("=", pfo_val__) \ + : [var] "i" (&(_var))); \ + \ + (typeof(_var))(unsigned long) pfo_val__; \ +}) + +#define percpu_unary_op(size, qual, op, _var) \ +({ \ + asm qual (__pcpu_op_##size(op) __percpu_arg([var]) \ + : [var] "+m" (__my_cpu_var(_var))); \ +}) + +#define percpu_binary_op(size, qual, op, _var, _val) \ +do { \ + __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \ + \ + if (0) { \ + TYPEOF_UNQUAL(_var) pto_tmp__; \ + pto_tmp__ = (_val); \ + (void)pto_tmp__; \ + } \ + asm qual (__pcpu_op_##size(op) "%[val], " __percpu_arg([var]) \ + : [var] "+m" (__my_cpu_var(_var)) \ + : [val] __pcpu_reg_imm_##size(pto_val__)); \ +} while (0) + +/* + * Generate a per-CPU add to memory instruction and optimize code * if one is added or subtracted. */ -#define percpu_add_op(var, val) \ +#define percpu_add_op(size, qual, var, val) \ do { \ - typedef typeof(var) pao_T__; \ - const int pao_ID__ = (__builtin_constant_p(val) && \ - ((val) == 1 || (val) == -1)) ? \ - (int)(val) : 0; \ + const int pao_ID__ = \ + (__builtin_constant_p(val) && \ + ((val) == 1 || \ + (val) == (typeof(val))-1)) ? (int)(val) : 0; \ + \ if (0) { \ - pao_T__ pao_tmp__; \ + TYPEOF_UNQUAL(var) pao_tmp__; \ pao_tmp__ = (val); \ (void)pao_tmp__; \ } \ - switch (sizeof(var)) { \ - case 1: \ - if (pao_ID__ == 1) \ - asm("incb "__percpu_arg(0) : "+m" (var)); \ - else if (pao_ID__ == -1) \ - asm("decb "__percpu_arg(0) : "+m" (var)); \ - else \ - asm("addb %1, "__percpu_arg(0) \ - : "+m" (var) \ - : "qi" ((pao_T__)(val))); \ - break; \ - case 2: \ - if (pao_ID__ == 1) \ - asm("incw "__percpu_arg(0) : "+m" (var)); \ - else if (pao_ID__ == -1) \ - asm("decw "__percpu_arg(0) : "+m" (var)); \ - else \ - asm("addw %1, "__percpu_arg(0) \ - : "+m" (var) \ - : "ri" ((pao_T__)(val))); \ - break; \ - case 4: \ - if (pao_ID__ == 1) \ - asm("incl "__percpu_arg(0) : "+m" (var)); \ - else if (pao_ID__ == -1) \ - asm("decl "__percpu_arg(0) : "+m" (var)); \ - else \ - asm("addl %1, "__percpu_arg(0) \ - : "+m" (var) \ - : "ri" ((pao_T__)(val))); \ - break; \ - case 8: \ - if (pao_ID__ == 1) \ - asm("incq "__percpu_arg(0) : "+m" (var)); \ - else if (pao_ID__ == -1) \ - asm("decq "__percpu_arg(0) : "+m" (var)); \ - else \ - asm("addq %1, "__percpu_arg(0) \ - : "+m" (var) \ - : "re" ((pao_T__)(val))); \ - break; \ - default: __bad_percpu_size(); \ - } \ + if (pao_ID__ == 1) \ + percpu_unary_op(size, qual, "inc", var); \ + else if (pao_ID__ == -1) \ + percpu_unary_op(size, qual, "dec", var); \ + else \ + percpu_binary_op(size, qual, "add", var, val); \ } while (0) -#define percpu_from_op(op, var) \ -({ \ - typeof(var) pfo_ret__; \ - switch (sizeof(var)) { \ - case 1: \ - asm volatile(op "b "__percpu_arg(1)",%0"\ - : "=q" (pfo_ret__) \ - : "m" (var)); \ - break; \ - case 2: \ - asm volatile(op "w "__percpu_arg(1)",%0"\ - : "=r" (pfo_ret__) \ - : "m" (var)); \ - break; \ - case 4: \ - asm volatile(op "l "__percpu_arg(1)",%0"\ - : "=r" (pfo_ret__) \ - : "m" (var)); \ - break; \ - case 8: \ - asm volatile(op "q "__percpu_arg(1)",%0"\ - : "=r" (pfo_ret__) \ - : "m" (var)); \ - break; \ - default: __bad_percpu_size(); \ - } \ - pfo_ret__; \ -}) - -#define percpu_stable_op(op, var) \ -({ \ - typeof(var) pfo_ret__; \ - switch (sizeof(var)) { \ - case 1: \ - asm(op "b "__percpu_arg(P1)",%0" \ - : "=q" (pfo_ret__) \ - : "p" (&(var))); \ - break; \ - case 2: \ - asm(op "w "__percpu_arg(P1)",%0" \ - : "=r" (pfo_ret__) \ - : "p" (&(var))); \ - break; \ - case 4: \ - asm(op "l "__percpu_arg(P1)",%0" \ - : "=r" (pfo_ret__) \ - : "p" (&(var))); \ - break; \ - case 8: \ - asm(op "q "__percpu_arg(P1)",%0" \ - : "=r" (pfo_ret__) \ - : "p" (&(var))); \ - break; \ - default: __bad_percpu_size(); \ - } \ - pfo_ret__; \ -}) - -#define percpu_unary_op(op, var) \ -({ \ - switch (sizeof(var)) { \ - case 1: \ - asm(op "b "__percpu_arg(0) \ - : "+m" (var)); \ - break; \ - case 2: \ - asm(op "w "__percpu_arg(0) \ - : "+m" (var)); \ - break; \ - case 4: \ - asm(op "l "__percpu_arg(0) \ - : "+m" (var)); \ - break; \ - case 8: \ - asm(op "q "__percpu_arg(0) \ - : "+m" (var)); \ - break; \ - default: __bad_percpu_size(); \ - } \ -}) - /* * Add return operation */ -#define percpu_add_return_op(var, val) \ +#define percpu_add_return_op(size, qual, _var, _val) \ ({ \ - typeof(var) paro_ret__ = val; \ - switch (sizeof(var)) { \ - case 1: \ - asm("xaddb %0, "__percpu_arg(1) \ - : "+q" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - case 2: \ - asm("xaddw %0, "__percpu_arg(1) \ - : "+r" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - case 4: \ - asm("xaddl %0, "__percpu_arg(1) \ - : "+r" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - case 8: \ - asm("xaddq %0, "__percpu_arg(1) \ - : "+re" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - default: __bad_percpu_size(); \ - } \ - paro_ret__ += val; \ - paro_ret__; \ + __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \ + \ + asm qual (__pcpu_op_##size("xadd") "%[tmp], " \ + __percpu_arg([var]) \ + : [tmp] __pcpu_reg_##size("+", paro_tmp__), \ + [var] "+m" (__my_cpu_var(_var)) \ + : : "memory"); \ + (typeof(_var))(unsigned long) (paro_tmp__ + _val); \ }) /* - * xchg is implemented using cmpxchg without a lock prefix. xchg is - * expensive due to the implied lock prefix. The processor cannot prefetch - * cachelines if xchg is used. + * raw_cpu_xchg() can use a load-store since + * it is not required to be IRQ-safe. */ -#define percpu_xchg_op(var, nval) \ +#define raw_percpu_xchg_op(_var, _nval) \ ({ \ - typeof(var) pxo_ret__; \ - typeof(var) pxo_new__ = (nval); \ - switch (sizeof(var)) { \ - case 1: \ - asm("\n\tmov "__percpu_arg(1)",%%al" \ - "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ - "\n\tjnz 1b" \ - : "=&a" (pxo_ret__), "+m" (var) \ - : "q" (pxo_new__) \ - : "memory"); \ - break; \ - case 2: \ - asm("\n\tmov "__percpu_arg(1)",%%ax" \ - "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ - "\n\tjnz 1b" \ - : "=&a" (pxo_ret__), "+m" (var) \ - : "r" (pxo_new__) \ - : "memory"); \ - break; \ - case 4: \ - asm("\n\tmov "__percpu_arg(1)",%%eax" \ - "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ - "\n\tjnz 1b" \ - : "=&a" (pxo_ret__), "+m" (var) \ - : "r" (pxo_new__) \ - : "memory"); \ - break; \ - case 8: \ - asm("\n\tmov "__percpu_arg(1)",%%rax" \ - "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ - "\n\tjnz 1b" \ - : "=&a" (pxo_ret__), "+m" (var) \ - : "r" (pxo_new__) \ - : "memory"); \ - break; \ - default: __bad_percpu_size(); \ - } \ - pxo_ret__; \ + TYPEOF_UNQUAL(_var) pxo_old__ = raw_cpu_read(_var); \ + \ + raw_cpu_write(_var, _nval); \ + \ + pxo_old__; \ }) /* - * cmpxchg has no such implied lock semantics as a result it is much - * more efficient for cpu local operations. + * this_cpu_xchg() is implemented using CMPXCHG without a LOCK prefix. + * XCHG is expensive due to the implied LOCK prefix. The processor + * cannot prefetch cachelines if XCHG is used. */ -#define percpu_cmpxchg_op(var, oval, nval) \ +#define this_percpu_xchg_op(_var, _nval) \ ({ \ - typeof(var) pco_ret__; \ - typeof(var) pco_old__ = (oval); \ - typeof(var) pco_new__ = (nval); \ - switch (sizeof(var)) { \ - case 1: \ - asm("cmpxchgb %2, "__percpu_arg(1) \ - : "=a" (pco_ret__), "+m" (var) \ - : "q" (pco_new__), "0" (pco_old__) \ - : "memory"); \ - break; \ - case 2: \ - asm("cmpxchgw %2, "__percpu_arg(1) \ - : "=a" (pco_ret__), "+m" (var) \ - : "r" (pco_new__), "0" (pco_old__) \ - : "memory"); \ - break; \ - case 4: \ - asm("cmpxchgl %2, "__percpu_arg(1) \ - : "=a" (pco_ret__), "+m" (var) \ - : "r" (pco_new__), "0" (pco_old__) \ - : "memory"); \ - break; \ - case 8: \ - asm("cmpxchgq %2, "__percpu_arg(1) \ - : "=a" (pco_ret__), "+m" (var) \ - : "r" (pco_new__), "0" (pco_old__) \ - : "memory"); \ - break; \ - default: __bad_percpu_size(); \ - } \ - pco_ret__; \ + TYPEOF_UNQUAL(_var) pxo_old__ = this_cpu_read(_var); \ + \ + do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \ + \ + pxo_old__; \ }) /* - * this_cpu_read() makes gcc load the percpu variable every time it is - * accessed while this_cpu_read_stable() allows the value to be cached. - * this_cpu_read_stable() is more efficient and can be used if its value - * is guaranteed to be valid across cpus. The current users include - * get_current() and get_thread_info() both of which are actually - * per-thread variables implemented as per-cpu variables and thus - * stable for the duration of the respective task. + * CMPXCHG has no such implied lock semantics as a result it is much + * more efficient for CPU-local operations. */ -#define this_cpu_read_stable(var) percpu_stable_op("mov", var) - -#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp) - -#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) -#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) -#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) - -#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp) -#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp) -#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp) -#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) - -#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#ifdef CONFIG_X86_CMPXCHG64 -#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ +#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \ ({ \ - bool __ret; \ - typeof(pcp1) __o1 = (o1), __n1 = (n1); \ - typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - asm volatile("cmpxchg8b "__percpu_arg(1) \ - CC_SET(z) \ - : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ - : "b" (__n1), "c" (__n2)); \ - __ret; \ + __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \ + __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \ + \ + asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \ + __percpu_arg([var]) \ + : [oval] "+a" (pco_old__), \ + [var] "+m" (__my_cpu_var(_var)) \ + : [nval] __pcpu_reg_##size(, pco_new__) \ + : "memory"); \ + \ + (typeof(_var))(unsigned long) pco_old__; \ }) -#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double -#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double -#endif /* CONFIG_X86_CMPXCHG64 */ +#define percpu_try_cmpxchg_op(size, qual, _var, _ovalp, _nval) \ +({ \ + bool success; \ + __pcpu_type_##size *pco_oval__ = (__pcpu_type_##size *)(_ovalp); \ + __pcpu_type_##size pco_old__ = *pco_oval__; \ + __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \ + \ + asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \ + __percpu_arg([var]) \ + : "=@ccz" (success), \ + [oval] "+a" (pco_old__), \ + [var] "+m" (__my_cpu_var(_var)) \ + : [nval] __pcpu_reg_##size(, pco_new__) \ + : "memory"); \ + if (unlikely(!success)) \ + *pco_oval__ = pco_old__; \ + \ + likely(success); \ +}) + +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ +({ \ + union { \ + u64 var; \ + struct { \ + u32 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ + : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ + \ + old__.var; \ +}) + +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) + +#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval) \ +({ \ + bool success; \ + u64 *_oval = (u64 *)(_ovalp); \ + union { \ + u64 var; \ + struct { \ + u32 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = *_oval; \ + new__.var = _nval; \ + \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ + : ALT_OUTPUT_SP("=@ccz" (success), \ + [var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ + if (unlikely(!success)) \ + *_oval = old__.var; \ + \ + likely(success); \ +}) + +#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, , pcp, ovalp, nval) +#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval) + +#endif /* defined(CONFIG_X86_32) && !defined(CONFIG_UML) */ -/* - * Per cpu atomic 64 bit operations are only available under 64 bit. - * 32 bit must fall back to generic operations. - */ #ifdef CONFIG_X86_64 -#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp) -#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); -/* - * Pretty complex macro to generate cmpxchg16 instruction. The instruction - * is not supported on early AMD64 processors so we must be able to emulate - * it in software. The address used in the cmpxchg16 instruction must be - * aligned to a 16 byte boundary. - */ -#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ +#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval); +#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval); + +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ ({ \ - bool __ret; \ - typeof(pcp1) __o1 = (o1), __n1 = (n1); \ - typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ - "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ - X86_FEATURE_CX16, \ - ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ - "+m" (pcp2), "+d" (__o2)), \ - "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ - __ret; \ + union { \ + u128 var; \ + struct { \ + u64 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ + : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ + \ + old__.var; \ }) -#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double -#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double +#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) +#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) -#endif +#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval) \ +({ \ + bool success; \ + u128 *_oval = (u128 *)(_ovalp); \ + union { \ + u128 var; \ + struct { \ + u64 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = *_oval; \ + new__.var = _nval; \ + \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ + : ALT_OUTPUT_SP("=@ccz" (success), \ + [var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ + if (unlikely(!success)) \ + *_oval = old__.var; \ + \ + likely(success); \ +}) -static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, - const unsigned long __percpu *addr) -{ - unsigned long __percpu *a = - (unsigned long __percpu *)addr + nr / BITS_PER_LONG; +#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, , pcp, ovalp, nval) +#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval) + +#endif /* CONFIG_X86_64 */ + +#define raw_cpu_read_1(pcp) __raw_cpu_read(1, , pcp) +#define raw_cpu_read_2(pcp) __raw_cpu_read(2, , pcp) +#define raw_cpu_read_4(pcp) __raw_cpu_read(4, , pcp) +#define raw_cpu_write_1(pcp, val) __raw_cpu_write(1, , pcp, val) +#define raw_cpu_write_2(pcp, val) __raw_cpu_write(2, , pcp, val) +#define raw_cpu_write_4(pcp, val) __raw_cpu_write(4, , pcp, val) + +#define this_cpu_read_1(pcp) __raw_cpu_read(1, volatile, pcp) +#define this_cpu_read_2(pcp) __raw_cpu_read(2, volatile, pcp) +#define this_cpu_read_4(pcp) __raw_cpu_read(4, volatile, pcp) +#define this_cpu_write_1(pcp, val) __raw_cpu_write(1, volatile, pcp, val) +#define this_cpu_write_2(pcp, val) __raw_cpu_write(2, volatile, pcp, val) +#define this_cpu_write_4(pcp, val) __raw_cpu_write(4, volatile, pcp, val) + +#define this_cpu_read_stable_1(pcp) __raw_cpu_read_stable(1, pcp) +#define this_cpu_read_stable_2(pcp) __raw_cpu_read_stable(2, pcp) +#define this_cpu_read_stable_4(pcp) __raw_cpu_read_stable(4, pcp) + +#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) +#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val) +#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val) +#define raw_cpu_and_1(pcp, val) percpu_binary_op(1, , "and", (pcp), val) +#define raw_cpu_and_2(pcp, val) percpu_binary_op(2, , "and", (pcp), val) +#define raw_cpu_and_4(pcp, val) percpu_binary_op(4, , "and", (pcp), val) +#define raw_cpu_or_1(pcp, val) percpu_binary_op(1, , "or", (pcp), val) +#define raw_cpu_or_2(pcp, val) percpu_binary_op(2, , "or", (pcp), val) +#define raw_cpu_or_4(pcp, val) percpu_binary_op(4, , "or", (pcp), val) +#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) + +#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val) +#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val) +#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val) +#define this_cpu_and_1(pcp, val) percpu_binary_op(1, volatile, "and", (pcp), val) +#define this_cpu_and_2(pcp, val) percpu_binary_op(2, volatile, "and", (pcp), val) +#define this_cpu_and_4(pcp, val) percpu_binary_op(4, volatile, "and", (pcp), val) +#define this_cpu_or_1(pcp, val) percpu_binary_op(1, volatile, "or", (pcp), val) +#define this_cpu_or_2(pcp, val) percpu_binary_op(2, volatile, "or", (pcp), val) +#define this_cpu_or_4(pcp, val) percpu_binary_op(4, volatile, "or", (pcp), val) +#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval) + +#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val) +#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val) +#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val) +#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval) +#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval) +#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval) +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, , pcp, ovalp, nval) +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, , pcp, ovalp, nval) +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, , pcp, ovalp, nval) + +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val) +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val) +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val) +#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval) +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, volatile, pcp, ovalp, nval) +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, volatile, pcp, ovalp, nval) +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, volatile, pcp, ovalp, nval) +/* + * Per-CPU atomic 64-bit operations are only available under 64-bit kernels. + * 32-bit kernels must fall back to generic operations. + */ #ifdef CONFIG_X86_64 - return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; -#else - return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; -#endif -} -static inline bool x86_this_cpu_variable_test_bit(int nr, - const unsigned long __percpu *addr) -{ - bool oldbit; +#define raw_cpu_read_8(pcp) __raw_cpu_read(8, , pcp) +#define raw_cpu_write_8(pcp, val) __raw_cpu_write(8, , pcp, val) + +#define this_cpu_read_8(pcp) __raw_cpu_read(8, volatile, pcp) +#define this_cpu_write_8(pcp, val) __raw_cpu_write(8, volatile, pcp, val) + +#define this_cpu_read_stable_8(pcp) __raw_cpu_read_stable(8, pcp) + +#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val) +#define raw_cpu_and_8(pcp, val) percpu_binary_op(8, , "and", (pcp), val) +#define raw_cpu_or_8(pcp, val) percpu_binary_op(8, , "or", (pcp), val) +#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val) +#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval) +#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval) +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval) + +#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val) +#define this_cpu_and_8(pcp, val) percpu_binary_op(8, volatile, "and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_binary_op(8, volatile, "or", (pcp), val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) +#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) + +#define raw_cpu_read_long(pcp) raw_cpu_read_8(pcp) + +#else /* !CONFIG_X86_64: */ - asm volatile("btl "__percpu_arg(2)",%1" - CC_SET(c) - : CC_OUT(c) (oldbit) - : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); +/* There is no generic 64-bit read stable operation for 32-bit targets. */ +#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) - return oldbit; -} +#define raw_cpu_read_long(pcp) raw_cpu_read_4(pcp) -#define x86_this_cpu_test_bit(nr, addr) \ - (__builtin_constant_p((nr)) \ - ? x86_this_cpu_constant_test_bit((nr), (addr)) \ - : x86_this_cpu_variable_test_bit((nr), (addr))) +#endif /* CONFIG_X86_64 */ + +#define this_cpu_read_const(pcp) __raw_cpu_read_const(pcp) + +/* + * this_cpu_read() makes the compiler load the per-CPU variable every time + * it is accessed while this_cpu_read_stable() allows the value to be cached. + * this_cpu_read_stable() is more efficient and can be used if its value + * is guaranteed to be valid across CPUs. The current users include + * current_task and cpu_current_top_of_stack, both of which are + * actually per-thread variables implemented as per-CPU variables and + * thus stable for the duration of the respective task. + */ +#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp) + +#define x86_this_cpu_constant_test_bit(_nr, _var) \ +({ \ + unsigned long __percpu *addr__ = \ + (unsigned long __percpu *)&(_var) + BIT_WORD(_nr); \ + \ + !!(BIT_MASK(_nr) & raw_cpu_read(*addr__)); \ +}) + +#define x86_this_cpu_variable_test_bit(_nr, _var) \ +({ \ + bool oldbit; \ + \ + asm volatile("btl %[nr], " __percpu_arg([var]) \ + : "=@ccc" (oldbit) \ + : [var] "m" (__my_cpu_var(_var)), \ + [nr] "rI" (_nr)); \ + oldbit; \ +}) + +#define x86_this_cpu_test_bit(_nr, _var) \ + (__builtin_constant_p(_nr) \ + ? x86_this_cpu_constant_test_bit(_nr, _var) \ + : x86_this_cpu_variable_test_bit(_nr, _var)) #include <asm-generic/percpu.h> /* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); +DECLARE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_SMP @@ -568,46 +622,47 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); { [0 ... NR_CPUS-1] = _initvalue }; \ __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map -#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ EXPORT_PER_CPU_SYMBOL(_name) -#define DECLARE_EARLY_PER_CPU(_type, _name) \ - DECLARE_PER_CPU(_type, _name); \ - extern __typeof__(_type) *_name##_early_ptr; \ +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ extern __typeof__(_type) _name##_early_map[] -#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ - DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \ - extern __typeof__(_type) *_name##_early_ptr; \ +#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ + DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ extern __typeof__(_type) _name##_early_map[] -#define early_per_cpu_ptr(_name) (_name##_early_ptr) -#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) -#define early_per_cpu(_name, _cpu) \ - *(early_per_cpu_ptr(_name) ? \ - &early_per_cpu_ptr(_name)[_cpu] : \ +#define early_per_cpu_ptr(_name) (_name##_early_ptr) +#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) + +#define early_per_cpu(_name, _cpu) \ + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ &per_cpu(_name, _cpu)) -#else /* !CONFIG_SMP */ -#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ +#else /* !CONFIG_SMP: */ +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ DEFINE_PER_CPU(_type, _name) = _initvalue #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue -#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ EXPORT_PER_CPU_SYMBOL(_name) -#define DECLARE_EARLY_PER_CPU(_type, _name) \ +#define DECLARE_EARLY_PER_CPU(_type, _name) \ DECLARE_PER_CPU(_type, _name) -#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ +#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ DECLARE_PER_CPU_READ_MOSTLY(_type, _name) -#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) -#define early_per_cpu_ptr(_name) NULL +#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) +#define early_per_cpu_ptr(_name) NULL /* no early_per_cpu_map() */ -#endif /* !CONFIG_SMP */ +#endif /* !CONFIG_SMP */ #endif /* _ASM_X86_PERCPU_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8bdf74902293..7276ba70c88a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -2,12 +2,14 @@ #ifndef _ASM_X86_PERF_EVENT_H #define _ASM_X86_PERF_EVENT_H +#include <linux/static_call.h> + /* * Performance event hw details: */ #define INTEL_PMC_MAX_GENERIC 32 -#define INTEL_PMC_MAX_FIXED 3 +#define INTEL_PMC_MAX_FIXED 16 #define INTEL_PMC_IDX_FIXED 32 #define X86_PMC_IDX_MAX 64 @@ -29,9 +31,29 @@ #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35) +#define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36) +#define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40) + +#define INTEL_FIXED_BITS_STRIDE 4 +#define INTEL_FIXED_0_KERNEL (1ULL << 0) +#define INTEL_FIXED_0_USER (1ULL << 1) +#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2) +#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3) +#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2) #define HSW_IN_TX (1ULL << 32) #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) +#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34) +#define ICL_FIXED_0_ADAPTIVE (1ULL << 32) + +#define INTEL_FIXED_BITS_MASK \ + (INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER | \ + INTEL_FIXED_0_ANYTHREAD | INTEL_FIXED_0_ENABLE_PMI | \ + ICL_FIXED_0_ADAPTIVE) + +#define intel_fixed_bits_by_idx(_idx, _bits) \ + ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE)) #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) @@ -48,11 +70,22 @@ #define AMD64_L3_SLICE_SHIFT 48 #define AMD64_L3_SLICE_MASK \ - ((0xFULL) << AMD64_L3_SLICE_SHIFT) + (0xFULL << AMD64_L3_SLICE_SHIFT) +#define AMD64_L3_SLICEID_MASK \ + (0x7ULL << AMD64_L3_SLICE_SHIFT) #define AMD64_L3_THREAD_SHIFT 56 #define AMD64_L3_THREAD_MASK \ - ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + (0xFFULL << AMD64_L3_THREAD_SHIFT) +#define AMD64_L3_F19H_THREAD_MASK \ + (0x3ULL << AMD64_L3_THREAD_SHIFT) + +#define AMD64_L3_EN_ALL_CORES BIT_ULL(47) +#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46) + +#define AMD64_L3_COREID_SHIFT 42 +#define AMD64_L3_COREID_MASK \ + (0x7ULL << AMD64_L3_COREID_SHIFT) #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ @@ -74,6 +107,26 @@ #define AMD64_RAW_EVENT_MASK_NB \ (AMD64_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK) + +#define AMD64_PERFMON_V2_EVENTSEL_EVENT_NB \ + (AMD64_EVENTSEL_EVENT | \ + GENMASK_ULL(37, 36)) + +#define AMD64_PERFMON_V2_EVENTSEL_UMASK_NB \ + (ARCH_PERFMON_EVENTSEL_UMASK | \ + GENMASK_ULL(27, 24)) + +#define AMD64_PERFMON_V2_RAW_EVENT_MASK_NB \ + (AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \ + AMD64_PERFMON_V2_EVENTSEL_UMASK_NB) + +#define AMD64_PERFMON_V2_ENABLE_UMC BIT_ULL(31) +#define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC GENMASK_ULL(7, 0) +#define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC GENMASK_ULL(9, 8) +#define AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC \ + (AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC | \ + AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC) + #define AMD64_NUM_COUNTERS 4 #define AMD64_NUM_COUNTERS_CORE 6 #define AMD64_NUM_COUNTERS_NB 4 @@ -87,6 +140,21 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 #define ARCH_PERFMON_EVENTS_COUNT 7 +#define PEBS_DATACFG_MEMINFO BIT_ULL(0) +#define PEBS_DATACFG_GP BIT_ULL(1) +#define PEBS_DATACFG_XMMS BIT_ULL(2) +#define PEBS_DATACFG_LBRS BIT_ULL(3) +#define PEBS_DATACFG_CNTR BIT_ULL(4) +#define PEBS_DATACFG_METRICS BIT_ULL(5) +#define PEBS_DATACFG_LBR_SHIFT 24 +#define PEBS_DATACFG_CNTR_SHIFT 32 +#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0) +#define PEBS_DATACFG_FIX_SHIFT 48 +#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0) + +/* Steal the highest bit of pebs_data_cfg for SW usage */ +#define PEBS_UPDATE_DS_SW BIT_ULL(63) + /* * Intel "Architectural Performance Monitoring" CPUID * detection/enumeration details: @@ -118,11 +186,112 @@ union cpuid10_edx { struct { unsigned int num_counters_fixed:5; unsigned int bit_width_fixed:8; - unsigned int reserved:19; + unsigned int reserved1:2; + unsigned int anythread_deprecated:1; + unsigned int reserved2:16; } split; unsigned int full; }; +/* + * Intel "Architectural Performance Monitoring extension" CPUID + * detection/enumeration details: + */ +#define ARCH_PERFMON_EXT_LEAF 0x00000023 +#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 +#define ARCH_PERFMON_ACR_LEAF 0x2 +#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4 +#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5 + +union cpuid35_eax { + struct { + unsigned int leaf0:1; + /* Counters Sub-Leaf */ + unsigned int cntr_subleaf:1; + /* Auto Counter Reload Sub-Leaf */ + unsigned int acr_subleaf:1; + /* Events Sub-Leaf */ + unsigned int events_subleaf:1; + /* arch-PEBS Sub-Leaves */ + unsigned int pebs_caps_subleaf:1; + unsigned int pebs_cnts_subleaf:1; + unsigned int reserved:26; + } split; + unsigned int full; +}; + +union cpuid35_ebx { + struct { + /* UnitMask2 Supported */ + unsigned int umask2:1; + /* EQ-bit Supported */ + unsigned int eq:1; + unsigned int reserved:30; + } split; + unsigned int full; +}; + +/* + * Intel Architectural LBR CPUID detection/enumeration details: + */ +union cpuid28_eax { + struct { + /* Supported LBR depth values */ + unsigned int lbr_depth_mask:8; + unsigned int reserved:22; + /* Deep C-state Reset */ + unsigned int lbr_deep_c_reset:1; + /* IP values contain LIP */ + unsigned int lbr_lip:1; + } split; + unsigned int full; +}; + +union cpuid28_ebx { + struct { + /* CPL Filtering Supported */ + unsigned int lbr_cpl:1; + /* Branch Filtering Supported */ + unsigned int lbr_filter:1; + /* Call-stack Mode Supported */ + unsigned int lbr_call_stack:1; + } split; + unsigned int full; +}; + +union cpuid28_ecx { + struct { + /* Mispredict Bit Supported */ + unsigned int lbr_mispred:1; + /* Timed LBRs Supported */ + unsigned int lbr_timed_lbr:1; + /* Branch Type Field Supported */ + unsigned int lbr_br_type:1; + unsigned int reserved:13; + /* Branch counters (Event Logging) Supported */ + unsigned int lbr_counters:4; + } split; + unsigned int full; +}; + +/* + * AMD "Extended Performance Monitoring and Debug" CPUID + * detection/enumeration details: + */ +union cpuid_0x80000022_ebx { + struct { + /* Number of Core Performance Counters */ + unsigned int num_core_pmc:4; + /* Number of available LBR Stack Entries */ + unsigned int lbr_v2_stack_sz:6; + /* Number of Data Fabric Counters */ + unsigned int num_df_pmc:6; + /* Number of Unified Memory Controller Counters */ + unsigned int num_umc_pmc:6; + } split; + unsigned int full; +}; + struct x86_pmu_capability { int version; int num_counters_gp; @@ -131,19 +300,36 @@ struct x86_pmu_capability { int bit_width_fixed; unsigned int events_mask; int events_mask_len; + unsigned int pebs_ept :1; }; /* * Fixed-purpose performance events: */ +/* RDPMC offset for Fixed PMCs */ +#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30) +#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29) + /* - * All 3 fixed-mode PMCs are configured via this single MSR: + * All the fixed-mode PMCs are configured via this single MSR: */ #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d /* - * The counts are available in three separate MSRs: + * There is no event-code assigned to the fixed-mode PMCs. + * + * For a fixed-mode PMC, which has an equivalent event on a general-purpose + * PMC, the event-code of the equivalent event is used for the fixed-mode PMC, + * e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core. + * + * For a fixed-mode PMC, which doesn't have an equivalent event, a + * pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS. + * The pseudo event-code for a fixed-mode PMC must be 0x00. + * The pseudo umask-code is 0xX. The X equals the index of the fixed + * counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300. + * + * The counts are available in separate MSRs: */ /* Instr_Retired.Any: */ @@ -154,27 +340,280 @@ struct x86_pmu_capability { #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a #define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1) -/* CPU_CLK_Unhalted.Ref: */ +/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2) #define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES) +/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */ +#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c +#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) +#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) + +/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */ +/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */ +/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */ + +static inline bool use_fixed_pseudo_encoding(u64 code) +{ + return !(code & 0xff); +} + /* * We model BTS tracing as another fixed-mode PMC. * - * We choose a value in the middle of the fixed event range, since lower + * We choose the value 47 for the fixed index of BTS, since lower * values are used by actual fixed events and higher values are used * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. */ -#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) +#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15) -#define GLOBAL_STATUS_COND_CHG BIT_ULL(63) -#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62) -#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) -#define GLOBAL_STATUS_ASIF BIT_ULL(60) -#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) -#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) -#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) +/* + * The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for + * each TopDown metric event. + * + * Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS). + */ +#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16) +#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0) +#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1) +#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2) +#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3) +#define INTEL_PMC_IDX_TD_HEAVY_OPS (INTEL_PMC_IDX_METRIC_BASE + 4) +#define INTEL_PMC_IDX_TD_BR_MISPREDICT (INTEL_PMC_IDX_METRIC_BASE + 5) +#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6) +#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7) +#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND +#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \ + INTEL_PMC_MSK_FIXED_SLOTS) + +/* + * There is no event-code assigned to the TopDown events. + * + * For the slots event, use the pseudo code of the fixed counter 3. + * + * For the metric events, the pseudo event-code is 0x00. + * The pseudo umask-code starts from the middle of the pseudo event + * space, 0x80. + */ +#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */ +/* Level 1 metrics */ +#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */ +#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */ +#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */ +#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */ +/* Level 2 metrics */ +#define INTEL_TD_METRIC_HEAVY_OPS 0x8400 /* Heavy Operations metric */ +#define INTEL_TD_METRIC_BR_MISPREDICT 0x8500 /* Branch Mispredict metric */ +#define INTEL_TD_METRIC_FETCH_LAT 0x8600 /* Fetch Latency metric */ +#define INTEL_TD_METRIC_MEM_BOUND 0x8700 /* Memory bound metric */ + +#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND +#define INTEL_TD_METRIC_NUM 8 + +#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0 +#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT) + +static inline bool is_metric_idx(int idx) +{ + return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM; +} + +static inline bool is_topdown_idx(int idx) +{ + return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS; +} + +#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \ + (~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN) + +#define GLOBAL_STATUS_COND_CHG BIT_ULL(63) +#define GLOBAL_STATUS_BUFFER_OVF_BIT 62 +#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT) +#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) +#define GLOBAL_STATUS_ASIF BIT_ULL(60) +#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) +#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55 +#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT) +#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54 +#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT) +#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48 + +#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48) +/* + * We model guest LBR event tracing as another fixed-mode PMC like BTS. + * + * We choose bit 58 because it's used to indicate LBR stack frozen state + * for architectural perfmon v4, also we unconditionally mask that bit in + * the handle_pmi_common(), so it'll never be set in the overflow handling. + * + * With this fake counter assigned, the guest LBR event user (such as KVM), + * can program the LBR registers on its own, and we don't actually do anything + * with then in the host context. + */ +#define INTEL_PMC_IDX_FIXED_VLBR (GLOBAL_STATUS_LBRS_FROZEN_BIT) + +/* + * Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b, + * since it would claim bit 58 which is effectively Fixed26. + */ +#define INTEL_FIXED_VLBR_EVENT 0x1b00 + +/* + * Adaptive PEBS v4 + */ + +struct pebs_basic { + u64 format_group:32, + retire_latency:16, + format_size:16; + u64 ip; + u64 applicable_counters; + u64 tsc; +}; + +struct pebs_meminfo { + u64 address; + u64 aux; + union { + /* pre Alder Lake */ + u64 mem_latency; + /* Alder Lake and later */ + struct { + u64 instr_latency:16; + u64 pad2:16; + u64 cache_latency:16; + u64 pad3:16; + }; + }; + u64 tsx_tuning; +}; + +struct pebs_gprs { + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; + u64 r8, r9, r10, r11, r12, r13, r14, r15; +}; + +struct pebs_xmm { + u64 xmm[16*2]; /* two entries for each register */ +}; + +struct pebs_cntr_header { + u32 cntr; + u32 fixed; + u32 metrics; + u32 reserved; +}; + +#define INTEL_CNTR_METRICS 0x3 + +/* + * Arch PEBS + */ +union arch_pebs_index { + struct { + u64 rsvd:4, + wr:23, + rsvd2:4, + full:1, + en:1, + rsvd3:3, + thresh:23, + rsvd4:5; + }; + u64 whole; +}; + +struct arch_pebs_header { + union { + u64 format; + struct { + u64 size:16, /* Record size */ + rsvd:14, + mode:1, /* 64BIT_MODE */ + cont:1, + rsvd2:3, + cntr:5, + lbr:2, + rsvd3:7, + xmm:1, + ymmh:1, + rsvd4:2, + opmask:1, + zmmh:1, + h16zmm:1, + rsvd5:5, + gpr:1, + aux:1, + basic:1; + }; + }; + u64 rsvd6; +}; + +struct arch_pebs_basic { + u64 ip; + u64 applicable_counters; + u64 tsc; + u64 retire :16, /* Retire Latency */ + valid :1, + rsvd :47; + u64 rsvd2; + u64 rsvd3; +}; + +struct arch_pebs_aux { + u64 address; + u64 rsvd; + u64 rsvd2; + u64 rsvd3; + u64 rsvd4; + u64 aux; + u64 instr_latency :16, + pad2 :16, + cache_latency :16, + pad3 :16; + u64 tsx_tuning; +}; + +struct arch_pebs_gprs { + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; + u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp; + u64 rsvd; +}; + +struct arch_pebs_xer_header { + u64 xstate; + u64 rsvd; +}; + +#define ARCH_PEBS_LBR_NAN 0x0 +#define ARCH_PEBS_LBR_NUM_8 0x1 +#define ARCH_PEBS_LBR_NUM_16 0x2 +#define ARCH_PEBS_LBR_NUM_VAR 0x3 +#define ARCH_PEBS_BASE_LBR_ENTRIES 8 +struct arch_pebs_lbr_header { + u64 rsvd; + u64 ctl; + u64 depth; + u64 ler_from; + u64 ler_to; + u64 ler_info; +}; + +struct arch_pebs_cntr_header { + u32 cntr; + u32 fixed; + u32 metrics; + u32 reserved; +}; + +/* + * AMD Extended Performance Monitoring and Debug cpuid feature detection + */ +#define EXT_PERFMON_DEBUG_FEATURES 0x80000022 /* * IBS cpuid feature detection @@ -197,6 +636,9 @@ struct x86_pmu_capability { #define IBS_CAPS_OPBRNFUSE (1U<<8) #define IBS_CAPS_FETCHCTLEXTD (1U<<9) #define IBS_CAPS_OPDATA4 (1U<<10) +#define IBS_CAPS_ZEN4 (1U<<11) +#define IBS_CAPS_OPLDLAT (1U<<12) +#define IBS_CAPS_OPDTLBPGSIZE (1U<<19) #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ | IBS_CAPS_FETCHSAM \ @@ -209,27 +651,39 @@ struct x86_pmu_capability { #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) #define IBSCTL_LVT_OFFSET_MASK 0x0F -/* ibs fetch bits/masks */ +/* IBS fetch bits/masks */ +#define IBS_FETCH_L3MISSONLY (1ULL<<59) #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT 0xFFFF0000ULL #define IBS_FETCH_MAX_CNT 0x0000FFFFULL -/* ibs op bits/masks */ -/* lower 4 bits of the current count are ignored: */ -#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32) +/* + * IBS op bits/masks + * The lower 7 bits of the current count are random bits + * preloaded by hardware and ignored in software + */ +#define IBS_OP_LDLAT_EN (1ULL<<63) +#define IBS_OP_LDLAT_THRSH (0xFULL<<59) +#define IBS_OP_CUR_CNT (0xFFF80ULL<<32) +#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32) +#define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL<<52) #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) +#define IBS_OP_L3MISSONLY (1ULL<<16) #define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ +#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */ #define IBS_RIP_INVALID (1ULL<<38) #ifdef CONFIG_X86_LOCAL_APIC extern u32 get_ibs_caps(void); +extern int forward_event_to_ibs(struct perf_event *event); #else static inline u32 get_ibs_caps(void) { return 0; } +static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; } #endif #ifdef CONFIG_PERF_EVENTS @@ -248,26 +702,28 @@ extern void perf_events_lapic_init(void); #define PERF_EFLAGS_VM (1UL << 5) struct pt_regs; -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); -extern unsigned long perf_misc_flags(struct pt_regs *regs); -#define perf_misc_flags(regs) perf_misc_flags(regs) +struct x86_perf_regs { + struct pt_regs regs; + u64 *xmm_regs; +}; + +extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_arch_misc_flags(struct pt_regs *regs); +extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs); +#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs) +#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) #include <asm/stacktrace.h> /* - * We abuse bit 3 from flags to pass exact information, see perf_misc_flags - * and the comment with PERF_EFLAGS_EXACT. + * We abuse bit 3 from flags to pass exact information, see + * perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT. */ #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ip = (__ip); \ - (regs)->bp = caller_frame_pointer(); \ + (regs)->sp = (unsigned long)__builtin_frame_address(0); \ (regs)->cs = __KERNEL_CS; \ regs->flags = 0; \ - asm volatile( \ - _ASM_MOV "%%"_ASM_SP ", %0\n" \ - : "=m" ((regs)->sp) \ - :: "memory" \ - ); \ } struct perf_guest_switch_msr { @@ -275,33 +731,78 @@ struct perf_guest_switch_msr { u64 host, guest; }; -extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); +struct x86_pmu_lbr { + unsigned int nr; + unsigned int from; + unsigned int to; + unsigned int info; + bool has_callstack; +}; + extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); +extern u64 perf_get_hw_event_config(int hw_event); extern void perf_check_microcode(void); +extern void perf_clear_dirty_counters(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else -static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) { - *nr = 0; - return NULL; + memset(cap, 0, sizeof(*cap)); } -static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +static inline u64 perf_get_hw_event_config(int hw_event) { - memset(cap, 0, sizeof(*cap)); + return 0; } static inline void perf_events_lapic_init(void) { } static inline void perf_check_microcode(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); +extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr); +#else +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); +static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +{ + memset(lbr, 0, sizeof(*lbr)); +} +#endif + #ifdef CONFIG_CPU_SUP_INTEL extern void intel_pt_handle_vmx(int on); +#else +static inline void intel_pt_handle_vmx(int on) +{ + +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) extern void amd_pmu_enable_virt(void); extern void amd_pmu_disable_virt(void); + +#if defined(CONFIG_PERF_EVENTS_AMD_BRS) + +#define PERF_NEEDS_LOPWR_CB 1 + +/* + * architectural low power callback impacts + * drivers/acpi/processor_idle.c + * drivers/acpi/acpi_pad.c + */ +extern void perf_amd_brs_lopwr_cb(bool lopwr_in); + +DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); + +static __always_inline void perf_lopwr_cb(bool lopwr_in) +{ + static_call_mod(perf_lopwr_cb)(lopwr_in); +} + +#endif /* PERF_NEEDS_LOPWR_CB */ + #else static inline void amd_pmu_enable_virt(void) { } static inline void amd_pmu_disable_virt(void) { } diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 94de1a05aeba..d65e338b6a5f 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config) static inline int p4_ht_active(void) { #ifdef CONFIG_SMP - return smp_num_siblings > 1; + return __max_threads_per_core > 1; #endif return 0; } @@ -189,7 +189,7 @@ static inline int p4_ht_active(void) static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) + if (__max_threads_per_core == 2) return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index a281e61ec60c..c88691b15f3c 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -6,6 +6,12 @@ #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> +#include <asm/cpufeature.h> + +#define __HAVE_ARCH_PTE_ALLOC_ONE +#define __HAVE_ARCH_PGD_FREE +#include <asm-generic/pgalloc.h> + static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } #ifdef CONFIG_PARAVIRT_XXL @@ -26,20 +32,16 @@ static inline void paravirt_release_p4d(unsigned long pfn) {} #endif /* - * Flags to use when allocating a user page table page. - */ -extern gfp_t __userpte_alloc_gfp; - -#ifdef CONFIG_PAGE_TABLE_ISOLATION -/* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. + * In case of Page Table Isolation active, we acquire two PGDs instead of one. + * Being order-1, it is both 8k in size and 8k-aligned. That lets us just + * flip bit 12 in a pointer to swap between the two 4k halves. */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif +static inline unsigned int pgd_allocation_order(void) +{ + if (cpu_feature_enabled(X86_FEATURE_PTI)) + return 1; + return 0; +} /* * Allocate and free page tables. @@ -47,24 +49,8 @@ extern gfp_t __userpte_alloc_gfp; extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -extern pte_t *pte_alloc_one_kernel(struct mm_struct *); extern pgtable_t pte_alloc_one(struct mm_struct *); -/* Should really implement gc for free page table pages. This could be - done with a reference count in struct page. */ - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte); static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, @@ -96,33 +82,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); } -#define pmd_pgtable(pmd) pmd_page(pmd) - #if CONFIG_PGTABLE_LEVELS > 2 -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - struct page *page; - gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO; - - if (mm == &init_mm) - gfp &= ~__GFP_ACCOUNT; - page = alloc_pages(gfp, 0); - if (!page) - return NULL; - if (!pgtable_pmd_page_ctor(page)) { - __free_pages(page, 0); - return NULL; - } - return (pmd_t *)page_address(page); -} - -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) -{ - BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); - pgtable_pmd_page_dtor(virt_to_page(pmd)); - free_page((unsigned long)pmd); -} - extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, @@ -160,21 +120,6 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pu set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud))); } -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - gfp_t gfp = GFP_KERNEL_ACCOUNT; - - if (mm == &init_mm) - gfp &= ~__GFP_ACCOUNT; - return (pud_t *)get_zeroed_page(gfp); -} - -static inline void pud_free(struct mm_struct *mm, pud_t *pud) -{ - BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); - free_page((unsigned long)pud); -} - extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, @@ -200,24 +145,6 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4 set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } -static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - gfp_t gfp = GFP_KERNEL_ACCOUNT; - - if (mm == &init_mm) - gfp &= ~__GFP_ACCOUNT; - return (p4d_t *)get_zeroed_page(gfp); -} - -static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) -{ - if (!pgtable_l5_enabled()) - return; - - BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); - free_page((unsigned long)p4d); -} - extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 60d0f9015317..e9482a11ac52 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -80,21 +80,37 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi return ((value >> rightshift) & mask) << leftshift; } -/* Encode and de-code a swap entry */ +/* + * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that + * are !pte_none() && !pte_present(). + * + * Format of swap PTEs: + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * <----------------- offset ------------------> 0 E <- type --> 0 + * + * E is the exclusive marker that is not stored in swap entries. + */ #define SWP_TYPE_BITS 5 +#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1) +#define _SWP_TYPE_SHIFT (_PAGE_BIT_PRESENT + 1) #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) -#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) -#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ - & ((1U << SWP_TYPE_BITS) - 1)) +#define __swp_type(x) (((x).val >> _SWP_TYPE_SHIFT) \ + & _SWP_TYPE_MASK) #define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << (_PAGE_BIT_PRESENT + 1)) \ + (((type) & _SWP_TYPE_MASK) << _SWP_TYPE_SHIFT) \ | ((offset) << SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) +/* We borrow bit 7 to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE + /* No inverted PFNs on 2 level page tables */ static inline u64 protnone_mask(u64 val) diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 6deb6cd236e3..54690bd4ddbe 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H #define _ASM_X86_PGTABLE_2LEVEL_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> typedef unsigned long pteval_t; @@ -16,22 +16,22 @@ typedef union { pteval_t pte; pteval_t pte_low; } pte_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#define SHARED_KERNEL_PMD 0 +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* - * traditional i386 two-level paging structure: + * Traditional i386 two-level paging structure: */ #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 - /* - * the i386 is two-level, so we don't really have any - * PMD directory physically. + * The i386 is two-level, so we don't really have any + * PMD directory physically: */ +#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 1024 diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index f8b1ad2c3828..dabafba957ea 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_H #define _ASM_X86_PGTABLE_3LEVEL_H -#include <asm/atomic64_32.h> - /* * Intel Physical Address Extension (PAE) Mode - three-level page * tables on PPro+ CPUs. @@ -21,7 +19,15 @@ pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) -/* Rules for using set_pte: the pte being assigned *must* be +#define pxx_xchg64(_pxx, _ptr, _val) ({ \ + _pxx##val_t *_p = (_pxx##val_t *)_ptr; \ + _pxx##val_t _o = *_p; \ + do { } while (!try_cmpxchg64(_p, &_o, (_val))); \ + native_make_##_pxx(_o); \ +}) + +/* + * Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is * not possible, use pte_get_and_clear to obtain the old pte @@ -29,81 +35,27 @@ */ static inline void native_set_pte(pte_t *ptep, pte_t pte) { - ptep->pte_high = pte.pte_high; + WRITE_ONCE(ptep->pte_high, pte.pte_high); smp_wmb(); - ptep->pte_low = pte.pte_low; -} - -#define pmd_read_atomic pmd_read_atomic -/* - * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with - * a "*pmdp" dereference done by gcc. Problem is, in certain places - * where pte_offset_map_lock is called, concurrent page faults are - * allowed, if the mmap_sem is hold for reading. An example is mincore - * vs page faults vs MADV_DONTNEED. On the page fault side - * pmd_populate rightfully does a set_64bit, but if we're reading the - * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen - * because gcc will not read the 64bit of the pmd atomically. To fix - * this all places running pmd_offset_map_lock() while holding the - * mmap_sem in read mode, shall read the pmdp pointer using this - * function to know if the pmd is null nor not, and in turn to know if - * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd - * operations. - * - * Without THP if the mmap_sem is hold for reading, the pmd can only - * transition from null to not null while pmd_read_atomic runs. So - * we can always return atomic pmd values with this function. - * - * With THP if the mmap_sem is hold for reading, the pmd can become - * trans_huge or none or point to a pte (and in turn become "stable") - * at any time under pmd_read_atomic. We could read it really - * atomically here with a atomic64_read for the THP enabled case (and - * it would be a whole lot simpler), but to avoid using cmpxchg8b we - * only return an atomic pmdval if the low part of the pmdval is later - * found stable (i.e. pointing to a pte). And we're returning a none - * pmdval if the low part of the pmd is none. In some cases the high - * and low part of the pmdval returned may not be consistent if THP is - * enabled (the low part may point to previously mapped hugepage, - * while the high part may point to a more recently mapped hugepage), - * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part - * of the pmd to be read atomically to decide if the pmd is unstable - * or not, with the only exception of when the low part of the pmd is - * zero in which case we return a none pmd. - */ -static inline pmd_t pmd_read_atomic(pmd_t *pmdp) -{ - pmdval_t ret; - u32 *tmp = (u32 *)pmdp; - - ret = (pmdval_t) (*tmp); - if (ret) { - /* - * If the low part is null, we must not read the high part - * or we can end up with a partial pmd. - */ - smp_rmb(); - ret |= ((pmdval_t)*(tmp + 1)) << 32; - } - - return (pmd_t) { ret }; + WRITE_ONCE(ptep->pte_low, pte.pte_low); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { - set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); + pxx_xchg64(pte, ptep, native_pte_val(pte)); } static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); + pxx_xchg64(pmd, pmdp, native_pmd_val(pmd)); } static inline void native_set_pud(pud_t *pudp, pud_t pud) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd); #endif - set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); + pxx_xchg64(pud, pudp, native_pud_val(pud)); } /* @@ -114,17 +66,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud) static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep->pte_low = 0; + WRITE_ONCE(ptep->pte_low, 0); smp_wmb(); - ptep->pte_high = 0; + WRITE_ONCE(ptep->pte_high, 0); } -static inline void native_pmd_clear(pmd_t *pmd) +static inline void native_pmd_clear(pmd_t *pmdp) { - u32 *tmp = (u32 *)pmd; - *tmp = 0; + WRITE_ONCE(pmdp->pmd_low, 0); smp_wmb(); - *(tmp + 1) = 0; + WRITE_ONCE(pmdp->pmd_high, 0); } static inline void native_pud_clear(pud_t *pudp) @@ -147,41 +98,26 @@ static inline void pud_clear(pud_t *pudp) */ } + #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { - pte_t res; - - res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); - - return res; + return pxx_xchg64(pte, ptep, 0ULL); } -#else -#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) -#endif - -union split_pmd { - struct { - u32 pmd_low; - u32 pmd_high; - }; - pmd_t pmd; -}; -#ifdef CONFIG_SMP static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) { - union split_pmd res, *orig = (union split_pmd *)pmdp; - - /* xchg acts as a barrier before setting of the high bits */ - res.pmd_low = xchg(&orig->pmd_low, 0); - res.pmd_high = orig->pmd_high; - orig->pmd_high = 0; + return pxx_xchg64(pmd, pmdp, 0ULL); +} - return res.pmd; +static inline pud_t native_pudp_get_and_clear(pud_t *pudp) +{ + return pxx_xchg64(pud, pudp, 0ULL); } #else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) +#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) #endif #ifndef pmdp_establish @@ -197,67 +133,47 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * anybody. */ if (!(pmd_val(pmd) & _PAGE_PRESENT)) { - union split_pmd old, new, *ptr; - - ptr = (union split_pmd *)pmdp; - - new.pmd = pmd; - /* xchg acts as a barrier before setting of the high bits */ - old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low); - old.pmd_high = ptr->pmd_high; - ptr->pmd_high = new.pmd_high; - return old.pmd; - } - - do { - old = *pmdp; - } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); + old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low); + old.pmd_high = READ_ONCE(pmdp->pmd_high); + WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high); - return old; -} -#endif - -#ifdef CONFIG_SMP -union split_pud { - struct { - u32 pud_low; - u32 pud_high; - }; - pud_t pud; -}; - -static inline pud_t native_pudp_get_and_clear(pud_t *pudp) -{ - union split_pud res, *orig = (union split_pud *)pudp; - -#ifdef CONFIG_PAGE_TABLE_ISOLATION - pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0)); -#endif - - /* xchg acts as a barrier before setting of the high bits */ - res.pud_low = xchg(&orig->pud_low, 0); - res.pud_high = orig->pud_high; - orig->pud_high = 0; + return old; + } - return res.pud; + return pxx_xchg64(pmd, pmdp, pmd.pmd); } -#else -#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) #endif -/* Encode and de-code a swap entry */ +/* + * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that + * are !pte_none() && !pte_present(). + * + * Format of swap PTEs: + * + * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3 + * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 + * < type -> <---------------------- offset ---------------------- + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * --------------------------------------------> 0 E 0 0 0 0 0 0 0 + * + * E is the exclusive marker that is not stored in swap entries. + */ #define SWP_TYPE_BITS 5 +#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1) #define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) /* We always extract/encode the offset by shifting it all the way up, and then down again */ #define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) -#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) -#define __swp_type(x) (((x).val) & 0x1f) -#define __swp_offset(x) ((x).val >> 5) -#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) +#define __swp_type(x) (((x).val) & _SWP_TYPE_MASK) +#define __swp_offset(x) ((x).val >> SWP_TYPE_BITS) +#define __swp_entry(type, offset) ((swp_entry_t){((type) & _SWP_TYPE_MASK) \ + | (offset) << SWP_TYPE_BITS}) /* * Normally, __swp_entry() converts from arch-independent swp_entry_t to @@ -285,52 +201,8 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) #define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ __pteval_swp_offset(pte))) -#define gup_get_pte gup_get_pte -/* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atomically, - * but that is not possible (without expensive cmpxchg8b) on PAE. What - * we do have is the guarantee that a PTE will only either go from not - * present to present, or present to not present or both -- it will not - * switch to a completely different present page without a TLB flush in - * between; something that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees 'l' iff pte_high - * sees 'h'. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have gotten rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. Because get_user_pages_fast() only operates on present ptes - * we're safe. - */ -static inline pte_t gup_get_pte(pte_t *ptep) -{ - pte_t pte; - - do { - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - } while (unlikely(pte.pte_low != ptep->pte_low)); - - return pte; -} +/* We borrow bit 7 to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE #include <asm/pgtable-invert.h> diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 33845d36897c..580b09bf6a45 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H #define _ASM_X86_PGTABLE_3LEVEL_DEFS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> typedef u64 pteval_t; @@ -18,14 +18,16 @@ typedef union { }; pteval_t pte; } pte_t; -#endif /* !__ASSEMBLY__ */ - -#ifdef CONFIG_PARAVIRT_XXL -#define SHARED_KERNEL_PMD ((!static_cpu_has(X86_FEATURE_PTI) && \ - (pv_info.shared_kernel_pmd))) -#else -#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) -#endif + +typedef union { + struct { + unsigned long pmd_low, pmd_high; + }; + pmdval_t pmd; +} pmd_t; +#endif /* !__ASSEMBLER__ */ + +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* * PGDIR_SHIFT determines what a top-level page table entry can map diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h index a0c1525f1b6f..e12e52ae8083 100644 --- a/arch/x86/include/asm/pgtable-invert.h +++ b/arch/x86/include/asm/pgtable-invert.h @@ -2,7 +2,7 @@ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * A clear pte value is special, and doesn't get inverted. @@ -36,6 +36,6 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) return val; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 40616e805292..e33df3da6980 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,28 +15,35 @@ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) -/* - * Macros to add or remove encryption attribute - */ -#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot))) -#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) - -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ +#include <linux/spinlock.h> #include <asm/x86_init.h> +#include <asm/pkru.h> +#include <asm/fpu/api.h> +#include <asm/coco.h> +#include <asm-generic/pgtable_uffd.h> +#include <linux/page_table_check.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; -int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); - -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); -void ptdump_walk_pgd_level_checkwx(void); +bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); + +struct seq_file; +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user); +bool ptdump_walk_pgd_level_checkwx(void); +#define ptdump_check_wx ptdump_walk_pgd_level_checkwx void ptdump_walk_user_pgd_level_checkwx(void); +/* + * Macros to add or remove encryption attribute + */ +#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) +#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) + #ifdef CONFIG_DEBUG_WX -#define debug_checkwx() ptdump_walk_pgd_level_checkwx() #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() #else -#define debug_checkwx() do { } while (0) #define debug_checkwx_user() do { } while (0) #endif @@ -46,7 +53,7 @@ void ptdump_walk_user_pgd_level_checkwx(void); */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __visible; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) extern spinlock_t pgd_lock; extern struct list_head pgd_list; @@ -59,7 +66,6 @@ extern pmdval_t early_pmd_flags; #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT_XXL */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) -#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) @@ -114,47 +120,81 @@ extern pmdval_t early_pmd_flags; #define arch_end_context_switch(prev) do {} while(0) #endif /* CONFIG_PARAVIRT_XXL */ +static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) +{ + pmdval_t v = native_pmd_val(pmd); + + return native_make_pmd(v | set); +} + +static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) +{ + pmdval_t v = native_pmd_val(pmd); + + return native_make_pmd(v & ~clear); +} + +static inline pud_t pud_set_flags(pud_t pud, pudval_t set) +{ + pudval_t v = native_pud_val(pud); + + return native_make_pud(v | set); +} + +static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) +{ + pudval_t v = native_pud_val(pud); + + return native_make_pud(v & ~clear); +} + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ -static inline int pte_dirty(pte_t pte) +static inline bool pte_dirty(pte_t pte) { - return pte_flags(pte) & _PAGE_DIRTY; + return pte_flags(pte) & _PAGE_DIRTY_BITS; } +static inline bool pte_shstk(pte_t pte) +{ + return cpu_feature_enabled(X86_FEATURE_SHSTK) && + (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY; +} -static inline u32 read_pkru(void) +static inline int pte_young(pte_t pte) { - if (boot_cpu_has(X86_FEATURE_OSPKE)) - return __read_pkru(); - return 0; + return pte_flags(pte) & _PAGE_ACCESSED; } -static inline void write_pkru(u32 pkru) +static inline bool pte_decrypted(pte_t pte) { - if (boot_cpu_has(X86_FEATURE_OSPKE)) - __write_pkru(pkru); + return cc_mkdec(pte_val(pte)) == pte_val(pte); } -static inline int pte_young(pte_t pte) +#define pmd_dirty pmd_dirty +static inline bool pmd_dirty(pmd_t pmd) { - return pte_flags(pte) & _PAGE_ACCESSED; + return pmd_flags(pmd) & _PAGE_DIRTY_BITS; } -static inline int pmd_dirty(pmd_t pmd) +static inline bool pmd_shstk(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_DIRTY; + return cpu_feature_enabled(X86_FEATURE_SHSTK) && + (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) == + (_PAGE_DIRTY | _PAGE_PSE); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; } -static inline int pud_dirty(pud_t pud) +static inline bool pud_dirty(pud_t pud) { - return pud_flags(pud) & _PAGE_DIRTY; + return pud_flags(pud) & _PAGE_DIRTY_BITS; } static inline int pud_young(pud_t pud) @@ -162,9 +202,36 @@ static inline int pud_young(pud_t pud) return pud_flags(pud) & _PAGE_ACCESSED; } +static inline bool pud_shstk(pud_t pud) +{ + return cpu_feature_enabled(X86_FEATURE_SHSTK) && + (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) == + (_PAGE_DIRTY | _PAGE_PSE); +} + static inline int pte_write(pte_t pte) { - return pte_flags(pte) & _PAGE_RW; + /* + * Shadow stack pages are logically writable, but do not have + * _PAGE_RW. Check for them separately from _PAGE_RW itself. + */ + return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte); +} + +#define pmd_write pmd_write +static inline int pmd_write(pmd_t pmd) +{ + /* + * Shadow stack pages are logically writable, but do not have + * _PAGE_RW. Check for them separately from _PAGE_RW itself. + */ + return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd); +} + +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pud_flags(pud) & _PAGE_RW; } static inline int pte_huge(pte_t pte) @@ -191,6 +258,8 @@ static inline int pte_special(pte_t pte) static inline u64 protnone_mask(u64 val); +#define PFN_PTE_SHIFT PAGE_SHIFT + static inline unsigned long pte_pfn(pte_t pte) { phys_addr_t pfn = pte_val(pte); @@ -205,6 +274,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } +#define pud_pfn pud_pfn static inline unsigned long pud_pfn(pud_t pud) { phys_addr_t pfn = pud_val(pud); @@ -222,15 +292,10 @@ static inline unsigned long pgd_pfn(pgd_t pgd) return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } -static inline int p4d_large(p4d_t p4d) -{ - /* No 512 GiB pages yet */ - return 0; -} - #define pte_page(pte) pfn_to_page(pte_pfn(pte)) -static inline int pmd_large(pmd_t pte) +#define pmd_leaf pmd_leaf +static inline bool pmd_leaf(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; } @@ -238,13 +303,13 @@ static inline int pmd_large(pmd_t pte) #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_trans_huge(pmd_t pmd) { - return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; + return (pmd_val(pmd) & _PAGE_PSE) == _PAGE_PSE; } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline int pud_trans_huge(pud_t pud) { - return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; + return (pud_val(pud) & _PAGE_PSE) == _PAGE_PSE; } #endif @@ -254,29 +319,29 @@ static inline int has_transparent_hugepage(void) return boot_cpu_has(X86_FEATURE_PSE); } -#ifdef __HAVE_ARCH_PTE_DEVMAP -static inline int pmd_devmap(pmd_t pmd) +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +static inline bool pmd_special(pmd_t pmd) { - return !!(pmd_val(pmd) & _PAGE_DEVMAP); + return pmd_flags(pmd) & _PAGE_SPECIAL; } -#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static inline int pud_devmap(pud_t pud) +static inline pmd_t pmd_mkspecial(pmd_t pmd) { - return !!(pud_val(pud) & _PAGE_DEVMAP); + return pmd_set_flags(pmd, _PAGE_SPECIAL); } -#else -static inline int pud_devmap(pud_t pud) +#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */ + +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP +static inline bool pud_special(pud_t pud) { - return 0; + return pud_flags(pud) & _PAGE_SPECIAL; } -#endif -static inline int pgd_devmap(pgd_t pgd) +static inline pud_t pud_mkspecial(pud_t pud) { - return 0; + return pud_set_flags(pud, _PAGE_SPECIAL); } -#endif +#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline pte_t pte_set_flags(pte_t pte, pteval_t set) @@ -293,19 +358,90 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } -static inline pte_t pte_mkclean(pte_t pte) +/* + * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the + * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So + * when creating dirty, write-protected memory, a software bit is used: + * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the + * Dirty bit to SavedDirty, and vice-vesra. + * + * This shifting is only done if needed. In the case of shifting + * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of + * shifting SavedDirty->Dirty, the condition is Write=1. + */ +static inline pgprotval_t mksaveddirty_shift(pgprotval_t v) +{ + pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1; + + v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY; + v &= ~(cond << _PAGE_BIT_DIRTY); + + return v; +} + +static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v) { - return pte_clear_flags(pte, _PAGE_DIRTY); + pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1; + + v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY; + v &= ~(cond << _PAGE_BIT_SAVED_DIRTY); + + return v; } -static inline pte_t pte_mkold(pte_t pte) +static inline pte_t pte_mksaveddirty(pte_t pte) { - return pte_clear_flags(pte, _PAGE_ACCESSED); + pteval_t v = native_pte_val(pte); + + v = mksaveddirty_shift(v); + return native_make_pte(v); +} + +static inline pte_t pte_clear_saveddirty(pte_t pte) +{ + pteval_t v = native_pte_val(pte); + + v = clear_saveddirty_shift(v); + return native_make_pte(v); } static inline pte_t pte_wrprotect(pte_t pte) { - return pte_clear_flags(pte, _PAGE_RW); + pte = pte_clear_flags(pte, _PAGE_RW); + + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PTE (Write=0,Dirty=1). Move the hardware + * dirty value to the software bit, if present. + */ + return pte_mksaveddirty(pte); +} + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UFFD_WP; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP)); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline pte_t pte_mkclean(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_DIRTY_BITS); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkexec(pte_t pte) @@ -315,7 +451,16 @@ static inline pte_t pte_mkexec(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + + return pte_mksaveddirty(pte); +} + +static inline pte_t pte_mkwrite_shstk(pte_t pte) +{ + pte = pte_clear_flags(pte, _PAGE_RW); + + return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) @@ -323,11 +468,15 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte_set_flags(pte, _PAGE_ACCESSED); } -static inline pte_t pte_mkwrite(pte_t pte) +static inline pte_t pte_mkwrite_novma(pte_t pte) { return pte_set_flags(pte, _PAGE_RW); } +struct vm_area_struct; +pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma); +#define pte_mkwrite pte_mkwrite + static inline pte_t pte_mkhuge(pte_t pte) { return pte_set_flags(pte, _PAGE_PSE); @@ -353,48 +502,75 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte_set_flags(pte, _PAGE_SPECIAL); } -static inline pte_t pte_mkdevmap(pte_t pte) +/* See comments above mksaveddirty_shift() */ +static inline pmd_t pmd_mksaveddirty(pmd_t pmd) { - return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP); + pmdval_t v = native_pmd_val(pmd); + + v = mksaveddirty_shift(v); + return native_make_pmd(v); } -static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) +/* See comments above mksaveddirty_shift() */ +static inline pmd_t pmd_clear_saveddirty(pmd_t pmd) { pmdval_t v = native_pmd_val(pmd); - return native_make_pmd(v | set); + v = clear_saveddirty_shift(v); + return native_make_pmd(v); } -static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) +static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmdval_t v = native_pmd_val(pmd); + pmd = pmd_clear_flags(pmd, _PAGE_RW); - return native_make_pmd(v & ~clear); + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PMD (RW=0, Dirty=1). Move the hardware + * dirty value to the software bit. + */ + return pmd_mksaveddirty(pmd); } -static inline pmd_t pmd_mkold(pmd_t pmd) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pmd_uffd_wp(pmd_t pmd) { - return pmd_clear_flags(pmd, _PAGE_ACCESSED); + return pmd_flags(pmd) & _PAGE_UFFD_WP; } -static inline pmd_t pmd_mkclean(pmd_t pmd) +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) { - return pmd_clear_flags(pmd, _PAGE_DIRTY); + return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP)); } -static inline pmd_t pmd_wrprotect(pmd_t pmd) +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline pmd_t pmd_mkold(pmd_t pmd) { - return pmd_clear_flags(pmd, _PAGE_RW); + return pmd_clear_flags(pmd, _PAGE_ACCESSED); +} + +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + + return pmd_mksaveddirty(pmd); } -static inline pmd_t pmd_mkdevmap(pmd_t pmd) +static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_DEVMAP); + pmd = pmd_clear_flags(pmd, _PAGE_RW); + + return pmd_set_flags(pmd, _PAGE_DIRTY); } static inline pmd_t pmd_mkhuge(pmd_t pmd) @@ -407,23 +583,30 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_ACCESSED); } -static inline pmd_t pmd_mkwrite(pmd_t pmd) +static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_RW); } -static inline pud_t pud_set_flags(pud_t pud, pudval_t set) +pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); +#define pmd_mkwrite pmd_mkwrite + +/* See comments above mksaveddirty_shift() */ +static inline pud_t pud_mksaveddirty(pud_t pud) { pudval_t v = native_pud_val(pud); - return native_make_pud(v | set); + v = mksaveddirty_shift(v); + return native_make_pud(v); } -static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) +/* See comments above mksaveddirty_shift() */ +static inline pud_t pud_clear_saveddirty(pud_t pud) { pudval_t v = native_pud_val(pud); - return native_make_pud(v & ~clear); + v = clear_saveddirty_shift(v); + return native_make_pud(v); } static inline pud_t pud_mkold(pud_t pud) @@ -433,22 +616,26 @@ static inline pud_t pud_mkold(pud_t pud) static inline pud_t pud_mkclean(pud_t pud) { - return pud_clear_flags(pud, _PAGE_DIRTY); + return pud_clear_flags(pud, _PAGE_DIRTY_BITS); } static inline pud_t pud_wrprotect(pud_t pud) { - return pud_clear_flags(pud, _PAGE_RW); + pud = pud_clear_flags(pud, _PAGE_RW); + + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PUD (RW=0, Dirty=1). Move the hardware + * dirty value to the software bit. + */ + return pud_mksaveddirty(pud); } static inline pud_t pud_mkdirty(pud_t pud) { - return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); -} + pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); -static inline pud_t pud_mkdevmap(pud_t pud) -{ - return pud_set_flags(pud, _PAGE_DEVMAP); + return pud_mksaveddirty(pud); } static inline pud_t pud_mkhuge(pud_t pud) @@ -463,7 +650,9 @@ static inline pud_t pud_mkyoung(pud_t pud) static inline pud_t pud_mkwrite(pud_t pud) { - return pud_set_flags(pud, _PAGE_RW); + pud = pud_set_flags(pud, _PAGE_RW); + + return pud_clear_saveddirty(pud); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY @@ -548,6 +737,9 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; + /* This bit combination is used to mark shadow stacks */ + WARN_ON_ONCE((pgprot_val(pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == + _PAGE_DIRTY); pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PTE_PFN_MASK; return __pte(pfn | check_pgprot(pgprot)); @@ -569,16 +761,16 @@ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) return __pud(pfn | check_pgprot(pgprot)); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) +static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } -static inline pud_t pud_mknotpresent(pud_t pud) +static inline pud_t pud_mkinvalid(pud_t pud) { return pfn_pud(pud_pfn(pud), - __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); + __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); @@ -586,6 +778,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pteval_t val = pte_val(pte), oldval = val; + pte_t pte_result; /* * Chop off the NX bit (if present), and add the NX portion of @@ -594,25 +787,82 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) val &= _PAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); - return __pte(val); + + pte_result = __pte(val); + + /* + * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid: + * 1. Marking Write=0 PTEs Dirty=1 + * 2. Marking Dirty=1 PTEs Write=0 + * + * The first case cannot happen because the _PAGE_CHG_MASK will filter + * out any Dirty bit passed in newprot. Handle the second case by + * going through the mksaveddirty exercise. Only do this if the old + * value was Write=1 to avoid doing this on Shadow Stack PTEs. + */ + if (oldval & _PAGE_RW) + pte_result = pte_mksaveddirty(pte_result); + else + pte_result = pte_clear_saveddirty(pte_result); + + return pte_result; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { pmdval_t val = pmd_val(pmd), oldval = val; + pmd_t pmd_result; - val &= _HPAGE_CHG_MASK; + val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY); val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); - return __pmd(val); + + pmd_result = __pmd(val); + + /* + * Avoid creating shadow stack PMD by accident. See comment in + * pte_modify(). + */ + if (oldval & _PAGE_RW) + pmd_result = pmd_mksaveddirty(pmd_result); + else + pmd_result = pmd_clear_saveddirty(pmd_result); + + return pmd_result; +} + +static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) +{ + pudval_t val = pud_val(pud), oldval = val; + pud_t pud_result; + + val &= _HPAGE_CHG_MASK; + val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; + val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK); + + pud_result = __pud(val); + + /* + * Avoid creating shadow stack PUD by accident. See comment in + * pte_modify(). + */ + if (oldval & _PAGE_RW) + pud_result = pud_mksaveddirty(pud_result); + else + pud_result = pud_clear_saveddirty(pud_result); + + return pud_result; } -/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; - pgprotval_t addbits = pgprot_val(newprot); + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } @@ -623,11 +873,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) -static inline pgprot_t arch_filter_pgprot(pgprot_t prot) -{ - return canon_pgprot(prot); -} - static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) @@ -663,7 +908,7 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, pmd_t *populate_extra_pmd(unsigned long vaddr); pte_t *populate_extra_pte(unsigned long vaddr); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd); /* @@ -677,14 +922,14 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) return pgd; return __pti_set_user_pgtbl(pgdp, pgd); } -#else /* CONFIG_PAGE_TABLE_ISOLATION */ +#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { return pgd; } -#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_X86_32 @@ -693,7 +938,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) # include <asm/pgtable_64.h> #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/log2.h> @@ -710,17 +955,18 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } -static inline int pte_present(pte_t a) +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) { - return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); + if (__pte_needs_invert(pte_val(pte))) + return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); } +#define pte_advance_pfn pte_advance_pfn -#ifdef __HAVE_ARCH_PTE_DEVMAP -static inline int pte_devmap(pte_t a) +static inline int pte_present(pte_t a) { - return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } -#endif #define pte_accessible pte_accessible static inline bool pte_accessible(struct mm_struct *mm, pte_t a) @@ -729,7 +975,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a) return true; if ((pte_flags(a) & _PAGE_PROTNONE) && - mm_tlb_flush_pending(mm)) + atomic_read(&mm->tlb_flush_pending)) return true; return false; @@ -749,7 +995,7 @@ static inline int pmd_present(pmd_t pmd) #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the - * comment in include/asm-generic/pgtable.h + * comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { @@ -783,45 +1029,10 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) */ #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) -/* - * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] - * - * this macro returns the index of the entry in the pmd page which would - * control the given virtual address - */ -static inline unsigned long pmd_index(unsigned long address) -{ - return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); -} - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - * - * (Currently stuck as a macro because of indirect forward reference - * to linux/mm.h:page_to_nid()) - */ -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) - -/* - * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] - * - * this function returns the index of the entry in the pte page which would - * control the given virtual address - */ -static inline unsigned long pte_index(unsigned long address) -{ - return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); -} - -static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) -{ - return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); -} - static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) @@ -840,9 +1051,9 @@ static inline int pud_present(pud_t pud) return pud_flags(pud) & _PAGE_PRESENT; } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud)); + return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* @@ -851,34 +1062,18 @@ static inline unsigned long pud_page_vaddr(pud_t pud) */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) -/* Find an entry in the second-level page table.. */ -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); -} - -static inline int pud_large(pud_t pud) +#define pud_leaf pud_leaf +static inline bool pud_leaf(pud_t pud) { - return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == - (_PAGE_PSE | _PAGE_PRESENT); + return pud_val(pud) & _PAGE_PSE; } static inline int pud_bad(pud_t pud) { return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } -#else -static inline int pud_large(pud_t pud) -{ - return 0; -} #endif /* CONFIG_PGTABLE_LEVELS > 2 */ -static inline unsigned long pud_index(unsigned long address) -{ - return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); -} - #if CONFIG_PGTABLE_LEVELS > 3 static inline int p4d_none(p4d_t p4d) { @@ -890,9 +1085,9 @@ static inline int p4d_present(p4d_t p4d) return p4d_flags(p4d) & _PAGE_PRESENT; } -static inline unsigned long p4d_page_vaddr(p4d_t p4d) +static inline pud_t *p4d_pgtable(p4d_t p4d) { - return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); + return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); } /* @@ -901,17 +1096,11 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) */ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) -/* Find an entry in the third-level page table.. */ -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) -{ - return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); -} - static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; - if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (p4d_flags(p4d) & ~ignore_flags) != 0; @@ -957,7 +1146,7 @@ static inline int pgd_bad(pgd_t pgd) if (!pgtable_l5_enabled()) return 0; - if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; @@ -977,57 +1166,22 @@ static inline int pgd_none(pgd_t pgd) } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ -#endif /* __ASSEMBLY__ */ - -/* - * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] - * - * this macro returns the index of the entry in the pgd page which would - * control the given virtual address - */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) - -/* - * pgd_offset() returns a (pgd_t *) - * pgd_index() is used get the offset into the pgd page's array of pgd_t's; - */ -#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) -/* - * a shortcut to get a pgd_t in a given mm - */ -#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) -/* - * a shortcut which implies the use of the kernel's pgd, instead - * of a process's - */ -#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) - +#endif /* __ASSEMBLER__ */ #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); -extern void memblock_find_dma_reserve(void); +void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); #ifdef CONFIG_X86_64 -/* Realmode trampoline initialization. */ extern pgd_t trampoline_pgd_entry; -static inline void __meminit init_trampoline_default(void) -{ - /* Default trampoline pgd value */ - trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; -} -# ifdef CONFIG_RANDOMIZE_MEMORY -void __meminit init_trampoline(void); -# else -# define init_trampoline init_trampoline_default -# endif -#else -static inline void init_trampoline(void) { } #endif /* local pte updates need not use xchg for locking */ @@ -1056,21 +1210,17 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) return res; } -static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep , pte_t pte) -{ - native_set_pte(ptep, pte); -} - static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - native_set_pmd(pmdp, pmd); + page_table_check_pmd_set(mm, pmdp, pmd); + set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { + page_table_check_pud_set(mm, pudp, pud); native_set_pud(pudp, pud); } @@ -1101,6 +1251,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, pte); return pte; } @@ -1116,6 +1267,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, * care about updates and native needs no locking */ pte = native_local_ptep_get_and_clear(ptep); + page_table_check_pte_clear(mm, pte); } else { pte = ptep_get_and_clear(mm, addr, ptep); } @@ -1126,12 +1278,20 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); -} + /* + * Avoid accidentally creating shadow stack PTEs + * (Write=0,Dirty=1). Use cmpxchg() to prevent races with + * the hardware setting Dirty=1. + */ + pte_t old_pte, new_pte; -#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) + old_pte = READ_ONCE(*ptep); + do { + new_pte = pte_wrprotect(old_pte); + } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); +} -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) +#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, @@ -1152,37 +1312,43 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define pmd_write pmd_write -static inline int pmd_write(pmd_t pmd) -{ - return pmd_flags(pmd) & _PAGE_RW; -} - #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - return native_pmdp_get_and_clear(pmdp); + pmd_t pmd = native_pmdp_get_and_clear(pmdp); + + page_table_check_pmd_clear(mm, pmd); + + return pmd; } #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - return native_pudp_get_and_clear(pudp); + pud_t pud = native_pudp_get_and_clear(pudp); + + page_table_check_pud_clear(mm, pud); + + return pud; } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); -} + /* + * Avoid accidentally creating shadow stack PTEs + * (Write=0,Dirty=1). Use cmpxchg() to prevent races with + * the hardware setting Dirty=1. + */ + pmd_t old_pmd, new_pmd; -#define pud_write pud_write -static inline int pud_write(pud_t pud) -{ - return pud_flags(pud) & _PAGE_RW; + old_pmd = READ_ONCE(*pmdp); + do { + new_pmd = pmd_wrprotect(old_pmd); + } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); } #ifndef pmdp_establish @@ -1190,6 +1356,7 @@ static inline int pud_write(pud_t pud) static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { return xchg(pmdp, pmd); } else { @@ -1199,6 +1366,29 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } } #endif + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline pud_t pudp_establish(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(vma->vm_mm, pudp, pud); + if (IS_ENABLED(CONFIG_SMP)) { + return xchg(pudp, pud); + } else { + pud_t old = *pudp; + WRITE_ONCE(*pudp, pud); + return old; + } +} +#endif + +#define __HAVE_ARCH_PMDP_INVALIDATE_AD +extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp); + /* * Page table pages are page-aligned. The lower half of the top * level is used for userspace and the top half for the kernel. @@ -1213,11 +1403,9 @@ static inline bool pgdp_maps_userspace(void *__ptr) return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } -static inline int pgd_large(pgd_t pgd) { return 0; } - -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* - * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and * the user one is in the last 4k. To switch between them, you * just need to flip the 12th bit in their addresses. @@ -1262,12 +1450,12 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) { return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } -#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * - * dst - pointer to pgd range anwhere on a pgd page + * dst - pointer to pgd range anywhere on a pgd page * src - "" * count - the number of pgds to copy. * @@ -1277,7 +1465,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) return; /* Clone the user space pgd as well */ @@ -1308,6 +1496,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { } +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ +} static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { @@ -1316,6 +1509,20 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { } +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); +} + +static inline bool pte_swp_exclusive(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE); +} #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) @@ -1351,26 +1558,38 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 -#define PKRU_BITS_PER_PKEY 2 +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_UFFD_WP; +} -static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) { - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); + return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); } -static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) { - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - /* - * Access-disable disables writes too so we need to check - * both bits here. - */ - return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); + return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); } +static inline int pmd_swp_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -1402,6 +1621,11 @@ static inline bool __pte_access_permitted(unsigned long pteval, bool write) { unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; + /* + * Write=0,Dirty=1 PTEs are shadow stack, which the kernel + * shouldn't generally allow access to, but since they + * are already Write=0, the below logic covers both cases. + */ if (write) need_pte_bits |= _PAGE_RW; @@ -1437,7 +1661,85 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#include <asm-generic/pgtable.h> -#endif /* __ASSEMBLY__ */ +#define arch_check_zapped_pte arch_check_zapped_pte +void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte); + +#define arch_check_zapped_pmd arch_check_zapped_pmd +void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd); + +#define arch_check_zapped_pud arch_check_zapped_pud +void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud); + +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); +} + +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); +} +#endif + +#ifdef CONFIG_X86_SGX +int arch_memory_failure(unsigned long pfn, int flags); +#define arch_memory_failure arch_memory_failure + +bool arch_is_platform_page(u64 paddr); +#define arch_is_platform_page arch_is_platform_page +#endif + +/* + * Use set_p*_safe(), and elide TLB flushing, when confident that *no* + * TLB flush will be required as a result of the "set". For example, use + * in scenarios where it is known ahead of time that the routine is + * setting non-present entries, or re-setting an existing entry to the + * same value. Otherwise, use the typical "set" helpers and flush the + * TLB. + */ +#define set_pte_safe(ptep, pte) \ +({ \ + WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ + set_pte(ptep, pte); \ +}) + +#define set_pmd_safe(pmdp, pmd) \ +({ \ + WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ + set_pmd(pmdp, pmd); \ +}) + +#define set_pud_safe(pudp, pud) \ +({ \ + WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ + set_pud(pudp, pud); \ +}) + +#define set_p4d_safe(p4dp, p4d) \ +({ \ + WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ + set_p4d(p4dp, p4d); \ +}) + +#define set_pgd_safe(pgdp, pgd) \ +({ \ + WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ + set_pgd(pgdp, pgd); \ +}) +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 4fe9e7fc74d3..b612cc57a4d3 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -13,7 +13,7 @@ * This file contains the functions and defines necessary to modify and use * the i386 page table tree. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> #include <linux/threads.h> #include <asm/paravirt.h> @@ -29,68 +29,32 @@ extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; extern pmd_t initial_pg_pmd[]; -static inline void pgtable_cache_init(void) { } -static inline void check_pgt_cache(void) { } void paging_init(void); void sync_initial_page_table(void); -/* - * Define this if things work differently on an i386 and an i486: - * it will (on an i486) warn about kernel memory accesses that are - * done without a 'access_ok( ..)' - */ -#undef TEST_ACCESS_OK - #ifdef CONFIG_X86_PAE # include <asm/pgtable-3level.h> #else # include <asm/pgtable-2level.h> #endif -#if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ - pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte)) -#else -#define pte_offset_map(dir, address) \ - ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address))) -#define pte_unmap(pte) do { } while (0) -#endif - /* Clear a kernel PTE and flush it from the TLB */ #define kpte_clear_flush(ptep, vaddr) \ do { \ pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one_kernel((vaddr)); \ + flush_tlb_one_kernel((vaddr)); \ } while (0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* - * kern_addr_valid() is (1) for FLATMEM and (0) for - * SPARSEMEM and DISCONTIGMEM - */ -#ifdef CONFIG_FLATMEM -#define kern_addr_valid(addr) (1) -#else -#define kern_addr_valid(kaddr) (0) -#endif - -/* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. + * This is used to calculate the .brk reservation for initial pagetables. + * Enough space is reserved to allocate pagetables sufficient to cover all + * of LOWMEM_PAGES, which is an upper bound on the size of the direct map of + * lowmem. * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries + * With PAE paging (PTRS_PER_PMD > 1), we allocate PTRS_PER_PGD == 4 pages for + * the PMD's in addition to the pages required for the last level pagetables. */ #if PTRS_PER_PMD > 1 #define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) @@ -106,6 +70,6 @@ do { \ * with only a host target support using a 32-bit type for internal * representation. */ -#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) +#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h new file mode 100644 index 000000000000..921148b42967 --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_PGTABLE_32_AREAS_H +#define _ASM_X86_PGTABLE_32_AREAS_H + +#include <asm/cpu_entry_area.h> + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLER__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) + +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) + +#define LDT_BASE_ADDR \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) + +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) +#endif + +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..5356a46b0373 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PGTABLE_32_DEFS_H -#define _ASM_X86_PGTABLE_32_DEFS_H +#ifndef _ASM_X86_PGTABLE_32_TYPES_H +#define _ASM_X86_PGTABLE_32_TYPES_H /* * The Linux x86 paging architecture is 'compile-time dual-mode', it @@ -20,54 +20,4 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* Just any arbitrary offset to the start of the vmalloc VM area: the - * current 8MB value just means that there will be a 8MB "hole" after the - * physical memory until the kernel virtual memory starts. That means that - * any out-of-bounds memory accesses will hopefully be caught. - * The vmalloc() routines leaves a hole of 4kB between each vmalloced - * area for the same reason. ;) - */ -#define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLY__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - -#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -/* - * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c - * to avoid include recursion hell - */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) - -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ - & PMD_MASK) - -#define LDT_BASE_ADDR \ - ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) - -#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) - -#define PKMAP_BASE \ - ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) - -#ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) -#else -# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) -#endif - -#define MODULES_VADDR VMALLOC_START -#define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) - -#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) - -#endif /* _ASM_X86_PGTABLE_32_DEFS_H */ +#endif /* _ASM_X86_PGTABLE_32_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 9c85b54bf03c..f06e5d6a2747 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -5,7 +5,7 @@ #include <linux/const.h> #include <asm/pgtable_64_types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This file contains the functions and defines necessary to modify and use @@ -41,11 +41,9 @@ static inline void sync_initial_page_table(void) { } pr_err("%s:%d: bad pud %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pud_val(e)) -#if CONFIG_PGTABLE_LEVELS >= 5 #define p4d_ERROR(e) \ pr_err("%s:%d: bad p4d %p(%016lx)\n", \ __FILE__, __LINE__, &(e), p4d_val(e)) -#endif #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %p(%016lx)\n", \ @@ -53,6 +51,12 @@ static inline void sync_initial_page_table(void) { } struct mm_struct; +#define mm_p4d_folded mm_p4d_folded +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + return !pgtable_l5_enabled(); +} + void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); @@ -137,7 +141,8 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; - if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + if (pgtable_l5_enabled() || + !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) { WRITE_ONCE(*p4dp, p4d); return; } @@ -162,34 +167,25 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } -extern void sync_global_pgds(unsigned long start, unsigned long end); - /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ -/* - * Level 4 access. - */ -#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) - -/* PUD - Level3 access */ +/* PGD - Level 4 access */ -/* PMD - Level 2 access */ +/* PUD - Level 3 access */ -/* PTE - Level 1 access. */ +/* PMD - Level 2 access */ -/* x86-64 always has all page tables mapped. */ -#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) ((void)(pte))/* NOP */ +/* PTE - Level 1 access */ /* * Encode and de-code a swap entry * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| E|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -197,9 +193,17 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * erratum where they can be incorrectly set by hardware on * non-present PTEs. * + * SD Bits 1-4 are not used in non-present format and available for + * special use described below: + * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * + * F (2) in swp entry is used to record when a pagetable is + * writeprotected by userfaultfd WP support. + * + * E (3) in swp entry is used to remember PG_anon_exclusive. + * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * @@ -232,18 +236,14 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) -#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) +#define __swp_entry_to_pte(x) (__pte((x).val)) +#define __swp_entry_to_pmd(x) (__pmd((x).val)) -extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#define pgtable_cache_init() do { } while (0) -#define check_pgt_cache() do { } while (0) - #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 @@ -259,15 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #define gup_fast_permitted gup_fast_permitted -static inline bool gup_fast_permitted(unsigned long start, int nr_pages, - int write) +static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { - unsigned long len, end; - - len = (unsigned long)nr_pages << PAGE_SHIFT; - end = start + len; - if (end < start) - return false; if (end >> __VIRTUAL_MASK_SHIFT) return false; return true; @@ -275,5 +268,26 @@ static inline bool gup_fast_permitted(unsigned long start, int nr_pages, #include <asm/pgtable-invert.h> -#endif /* !__ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ + +#define l4_index(x) (((x) >> 39) & 511) +#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) +L4_START_KERNEL = l4_index(__START_KERNEL_map) + +L3_START_KERNEL = pud_index(__START_KERNEL_map) + +#define SYM_DATA_START_PAGE_ALIGNED(name) \ + SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) + +/* Automate the creation of 1 to 1 mapping pmd entries */ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ + .endr + +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 88bca456da99..7eb61ef6a185 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -4,7 +4,7 @@ #include <asm/sparsemem.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/kaslr.h> @@ -19,8 +19,8 @@ typedef unsigned long pgdval_t; typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; +typedef struct { pmdval_t pmd; } pmd_t; -#ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled; #ifdef USE_EARLY_PGTABLE_L5 @@ -36,18 +36,13 @@ static inline bool pgtable_l5_enabled(void) #define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) #endif /* USE_EARLY_PGTABLE_L5 */ -#else -#define pgtable_l5_enabled() 0 -#endif /* CONFIG_X86_5LEVEL */ +#define ARCH_PAGE_TABLE_SYNC_MASK \ + (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED) extern unsigned int pgdir_shift; extern unsigned int ptrs_per_p4d; -#endif /* !__ASSEMBLY__ */ - -#define SHARED_KERNEL_PMD 0 - -#ifdef CONFIG_X86_5LEVEL +#endif /* !__ASSEMBLER__ */ /* * PGDIR_SHIFT determines what a top-level page table entry can map @@ -66,17 +61,6 @@ extern unsigned int ptrs_per_p4d; #define MAX_POSSIBLE_PHYSMEM_BITS 52 -#else /* CONFIG_X86_5LEVEL */ - -/* - * PGDIR_SHIFT determines what a top-level page table entry can map - */ -#define PGDIR_SHIFT 39 -#define PTRS_PER_PGD 512 -#define MAX_PTRS_PER_P4D 1 - -#endif /* CONFIG_X86_5LEVEL */ - /* * 3rd level page */ @@ -103,7 +87,7 @@ extern unsigned int ptrs_per_p4d; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* - * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. * * Be very careful vs. KASLR when changing anything here. The KASLR address * range must not overlap with anything except the KASAN shadow area, which @@ -129,21 +113,68 @@ extern unsigned int ptrs_per_p4d; #define __VMEMMAP_BASE_L4 0xffffea0000000000UL #define __VMEMMAP_BASE_L5 0xffd4000000000000UL -#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base # define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) # define VMEMMAP_START vmemmap_base + +#ifdef CONFIG_RANDOMIZE_MEMORY +# define DIRECT_MAP_PHYSMEM_END direct_map_physmem_end +#endif + +/* + * End of the region for which vmalloc page tables are pre-allocated. + * For non-KMSAN builds, this is the same as VMALLOC_END. + * For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than + * VMALLOC_START..VMALLOC_END (see below). + */ +#define VMEMORY_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) + +#ifndef CONFIG_KMSAN +#define VMALLOC_END VMEMORY_END #else -# define VMALLOC_START __VMALLOC_BASE_L4 -# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 -# define VMEMMAP_START __VMEMMAP_BASE_L4 -#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ +/* + * In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4 + * are used to keep the metadata for virtual pages. The memory formerly + * belonging to vmalloc area is now laid out as follows: + * + * 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area + * 2nd quarter: KMSAN_VMALLOC_SHADOW_START to + * VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow + * 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to + * VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins + * 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START + * - shadow for modules, + * KMSAN_MODULES_ORIGIN_START to + * KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules. + */ +#define VMALLOC_QUARTER_SIZE ((VMALLOC_SIZE_TB << 40) >> 2) +#define VMALLOC_END (VMALLOC_START + VMALLOC_QUARTER_SIZE - 1) -#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) +/* + * vmalloc metadata addresses are calculated by adding shadow/origin offsets + * to vmalloc address. + */ +#define KMSAN_VMALLOC_SHADOW_OFFSET VMALLOC_QUARTER_SIZE +#define KMSAN_VMALLOC_ORIGIN_OFFSET (VMALLOC_QUARTER_SIZE << 1) + +#define KMSAN_VMALLOC_SHADOW_START (VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET) +#define KMSAN_VMALLOC_ORIGIN_START (VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET) + +/* + * The shadow/origin for modules are placed one by one in the last 1/4 of + * vmalloc space. + */ +#define KMSAN_MODULES_SHADOW_START (VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1) +#define KMSAN_MODULES_ORIGIN_START (KMSAN_MODULES_SHADOW_START + MODULES_LEN) +#endif /* CONFIG_KMSAN */ #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END _AC(0xffffffffff000000, UL) +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define MODULES_END _AC(0xffffffffff000000, UL) +#else +# define MODULES_END _AC(0xfffffffffe000000, UL) +#endif #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define ESPFIX_PGD_ENTRY _AC(-2, UL) @@ -159,4 +190,9 @@ extern unsigned int ptrs_per_p4d; #define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t)) +/* + * We borrow bit 3 to remember PG_anon_exclusive. + */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_PWT + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h new file mode 100644 index 000000000000..4f056fb88174 --- /dev/null +++ b/arch/x86/include/asm/pgtable_areas.h @@ -0,0 +1,22 @@ +#ifndef _ASM_X86_PGTABLE_AREAS_H +#define _ASM_X86_PGTABLE_AREAS_H + +#ifdef CONFIG_X86_32 +# include <asm/pgtable_32_areas.h> +#endif + +/* Single page reserved for the readonly IDT mapping: */ +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif + +#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d6ff0bbdb394..2ec250ba467e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -7,8 +7,6 @@ #include <asm/page_types.h> -#define FIRST_USER_ADDRESS 0UL - #define _PAGE_BIT_PRESENT 0 /* is present */ #define _PAGE_BIT_RW 1 /* writeable */ #define _PAGE_BIT_USER 2 /* userspace addressable */ @@ -23,7 +21,8 @@ #define _PAGE_BIT_SOFTW2 10 /* " */ #define _PAGE_BIT_SOFTW3 11 /* " */ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ -#define _PAGE_BIT_SOFTW4 58 /* available for programmer */ +#define _PAGE_BIT_SOFTW4 57 /* available for programmer */ +#define _PAGE_BIT_SOFTW5 58 /* available for programmer */ #define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */ #define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */ #define _PAGE_BIT_PKEY_BIT2 61 /* Protection Keys, bit 3/4 */ @@ -32,8 +31,18 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 +#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ -#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 +#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */ + +#ifdef CONFIG_X86_64 +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */ +#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */ +#else +/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */ +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */ +#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */ +#endif /* If _PAGE_BIT_PRESENT is clear, we use these: */ /* - if the user mapped it with PROT_NONE; pte_present gives true */ @@ -55,6 +64,7 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K) #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS #define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0) #define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1) @@ -100,21 +110,37 @@ #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP) +#define _PAGE_SWP_UFFD_WP _PAGE_USER +#else +#define _PAGE_UFFD_WP (_AT(pteval_t, 0)) +#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) -#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) -#define __HAVE_ARCH_PTE_DEVMAP +#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4) #else #define _PAGE_NX (_AT(pteval_t, 0)) -#define _PAGE_DEVMAP (_AT(pteval_t, 0)) +#define _PAGE_SOFTW4 (_AT(pteval_t, 0)) #endif +/* + * The hardware requires shadow stack to be Write=0,Dirty=1. However, + * there are valid cases where the kernel might create read-only PTEs that + * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In + * this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit, + * to avoid creating a wrong "shadow stack" PTEs. Such PTEs have + * (Write=0,SavedDirty=1,Dirty=0) set. + */ +#define _PAGE_SAVED_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY) + +#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY) + #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\ - _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW) /* * Set of bits not changed in pte_modify. The pte's @@ -122,10 +148,12 @@ * instance, and is *not* included in this mask since * pte_modify() does modify it. */ -#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ - _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) -#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) +#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | \ + _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \ + _PAGE_CC | _PAGE_UFFD_WP) +#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) +#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) /* * The cache modes defined here are used to translate between pure SW usage @@ -135,103 +163,102 @@ * to have the WB mode at index 0 (all bits clear). This is the default * right now and likely would break too much if changed. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ enum page_cache_mode { - _PAGE_CACHE_MODE_WB = 0, - _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, _PAGE_CACHE_MODE_UC_MINUS = 2, - _PAGE_CACHE_MODE_UC = 3, - _PAGE_CACHE_MODE_WT = 4, - _PAGE_CACHE_MODE_WP = 5, - _PAGE_CACHE_MODE_NUM = 8 + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + + _PAGE_CACHE_MODE_NUM = 8 }; #endif -#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) +#define _PAGE_CC (_AT(pteval_t, cc_get_mask())) +#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) + +#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) +#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE) + #define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) #define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) - -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) -#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) - -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP) - -#define __PAGE_KERNEL_IO (__PAGE_KERNEL) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) - -#ifndef __ASSEMBLY__ - -#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) - -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ - _PAGE_DIRTY | _PAGE_ENC) -#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) - -#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) -#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) - -#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) -#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) - -#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask) - -#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC) -#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) -#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) -#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) - -#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE) - -#endif /* __ASSEMBLY__ */ - -/* xwr */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC -#define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC -#define __S101 PAGE_READONLY_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC +#define __PP _PAGE_PRESENT +#define __RW _PAGE_RW +#define _USR _PAGE_USER +#define ___A _PAGE_ACCESSED +#define ___D _PAGE_DIRTY +#define ___G _PAGE_GLOBAL +#define __NX _PAGE_NX + +#define _ENC _PAGE_ENC +#define __WP _PAGE_CACHE_WP +#define __NC _PAGE_NOCACHE +#define _PSE _PAGE_PSE + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pg(x) __pgprot(x) + +#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) +#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) +#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) + +/* + * Page tables needs to have Write=1 in order for any lower PTEs to be + * writable. This includes shadow stack memory (Write=0, Dirty=1) + */ +#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) +#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) +#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) +#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) + +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX| 0| 0|___G) +#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0| 0| 0|___G) +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) +#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX| 0| 0|___G) +#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) +#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) +#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) + + +#define __PAGE_KERNEL_IO __PAGE_KERNEL +#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE + + +#ifndef __ASSEMBLER__ + +#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) +#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) +#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0) +#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0) + +#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask) + +#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC) +#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0) +#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) +#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) +#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC) +#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) +#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) +#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) +#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC) + +#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) + +#endif /* __ASSEMBLER__ */ /* * early identity mapping pte attrib macros. @@ -250,7 +277,7 @@ enum page_cache_mode { # include <asm/pgtable_64_types.h> #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -267,6 +294,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; typedef struct { pgdval_t pgd; } pgd_t; +static inline pgprot_t pgprot_nx(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | _PAGE_NX); +} +#define pgprot_nx pgprot_nx + #ifdef CONFIG_X86_PAE /* @@ -358,11 +391,9 @@ static inline pudval_t native_pud_val(pud_t pud) #endif #if CONFIG_PGTABLE_LEVELS > 2 -typedef struct { pmdval_t pmd; } pmd_t; - static inline pmd_t native_make_pmd(pmdval_t val) { - return (pmd_t) { val }; + return (pmd_t) { .pmd = val }; } static inline pmdval_t native_pmd_val(pmd_t pmd) @@ -450,12 +481,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; -extern uint8_t __pte2cachemode_tbl[8]; - #define __pte2cm_idx(cb) \ ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \ (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \ @@ -465,43 +490,26 @@ extern uint8_t __pte2cachemode_tbl[8]; (((i) & 2) << (_PAGE_BIT_PCD - 1)) | \ (((i) & 1) << _PAGE_BIT_PWT)) -static inline unsigned long cachemode2protval(enum page_cache_mode pcm) +unsigned long cachemode2protval(enum page_cache_mode pcm); + +static inline pgprotval_t protval_4k_2_large(pgprotval_t val) { - if (likely(pcm == 0)) - return 0; - return __cachemode2pte_tbl[pcm]; + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); } -static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) +static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) { - return __pgprot(cachemode2protval(pcm)); + return __pgprot(protval_4k_2_large(pgprot_val(pgprot))); } -static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) +static inline pgprotval_t protval_large_2_4k(pgprotval_t val) { - unsigned long masked; - - masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK; - if (likely(masked == 0)) - return 0; - return __pte2cachemode_tbl[__pte2cm_idx(masked)]; -} -static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) -{ - pgprotval_t val = pgprot_val(pgprot); - pgprot_t new; - - pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | - ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); - return new; + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT_LARGE) >> + (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); } static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) { - pgprotval_t val = pgprot_val(pgprot); - pgprot_t new; - - pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | - ((val & _PAGE_PAT_LARGE) >> - (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); - return new; + return __pgprot(protval_large_2_4k(pgprot_val(pgprot))); } @@ -509,8 +517,6 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern pteval_t __default_kernel_pte_mask; -extern void set_nx(void); -extern int nx_enabled; #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); @@ -535,15 +541,13 @@ extern void native_pagetable_init(void); #define native_pagetable_init paging_init #endif -struct seq_file; -extern void arch_report_meminfo(struct seq_file *m); - enum pg_level { PG_LEVEL_NONE, PG_LEVEL_4K, PG_LEVEL_2M, PG_LEVEL_1G, PG_LEVEL_512G, + PG_LEVEL_256T, PG_LEVEL_NUM }; @@ -562,6 +566,8 @@ static inline void update_page_count(int level, unsigned long pages) { } extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); +pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address, + unsigned int *level, bool *nx, bool *rw); extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, @@ -570,6 +576,6 @@ extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long page_flags); extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, unsigned long numpages); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 19b137f1b3be..2e6c04d8a45b 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,16 +2,19 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H -#define ARCH_DEFAULT_PKEY 0 - -#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) +/* + * If more than 16 keys are ever supported, a thorough audit + * will be necessary to ensure that the types that store key + * numbers and masks have sufficient capacity. + */ +#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); static inline bool arch_pkeys_enabled(void) { - return boot_cpu_has(X86_FEATURE_OSPKE); + return cpu_feature_enabled(X86_FEATURE_OSPKE); } /* @@ -21,7 +24,7 @@ static inline bool arch_pkeys_enabled(void) extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); @@ -32,15 +35,12 @@ extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return 0; return __arch_override_mprotect_pkey(vma, prot, pkey); } -extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, - unsigned long init_val); - #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3) #define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) @@ -115,12 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey) return 0; } -extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, - unsigned long init_val); -extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, - unsigned long init_val); -extern void copy_init_pkru_to_fpregs(void); - static inline int vma_pkey(struct vm_area_struct *vma) { unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 | diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h new file mode 100644 index 000000000000..74f0a2d34ffd --- /dev/null +++ b/arch/x86/include/asm/pkru.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PKRU_H +#define _ASM_X86_PKRU_H + +#include <asm/cpufeature.h> + +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u +#define PKRU_BITS_PER_PKEY 2 + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#define pkru_get_init_value() READ_ONCE(init_pkru_value) +#else +#define init_pkru_value 0 +#define pkru_get_init_value() 0 +#endif + +static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); +} + +static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + /* + * Access-disable disables writes too so we need to check + * both bits here. + */ + return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); +} + +static inline u32 read_pkru(void) +{ + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + return rdpkru(); + return 0; +} + +static inline void write_pkru(u32 pkru) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + /* + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. + */ + if (pkru != rdpkru()) + wrpkru(pkru); +} + +static inline void pkru_write_default(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + wrpkru(pkru_get_init_value()); +} + +#endif diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 059823bb8af7..40f92270515b 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * platform_sst_audio.h: sst audio platform data header file * @@ -5,17 +6,10 @@ * Author: Jeeja KP <jeeja.kp@intel.com> * Omair Mohammed Abdullah <omair.m.abdullah@intel.com> * Vinod Koul ,vinod.koul@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. */ #ifndef _PLATFORM_SST_AUDIO_H_ #define _PLATFORM_SST_AUDIO_H_ -#include <linux/sfi.h> - #define MAX_NUM_STREAMS_MRFLD 25 #define MAX_NUM_STREAMS MAX_NUM_STREAMS_MRFLD diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h new file mode 100644 index 000000000000..a5f761fbf45b --- /dev/null +++ b/arch/x86/include/asm/posted_intr.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_POSTED_INTR_H +#define _X86_POSTED_INTR_H + +#include <asm/cmpxchg.h> +#include <asm/rwonce.h> +#include <asm/irq_vectors.h> + +#include <linux/bitmap.h> + +#define POSTED_INTR_ON 0 +#define POSTED_INTR_SN 1 + +#define PID_TABLE_ENTRY_VALID 1 + +#define NR_PIR_VECTORS 256 +#define NR_PIR_WORDS (NR_PIR_VECTORS / BITS_PER_LONG) + +/* Posted-Interrupt Descriptor */ +struct pi_desc { + unsigned long pir[NR_PIR_WORDS]; /* Posted interrupt requested */ + union { + struct { + u16 notifications; /* Suppress and outstanding bits */ + u8 nv; + u8 rsvd_2; + u32 ndst; + }; + u64 control; + }; + u32 rsvd[6]; +} __aligned(64); + +/* + * De-multiplexing posted interrupts is on the performance path, the code + * below is written to optimize the cache performance based on the following + * considerations: + * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently + * accessed by both CPU and IOMMU. + * 2.During software processing of posted interrupts, the CPU needs to do + * natural width read and xchg for checking and clearing posted interrupt + * request (PIR), a 256 bit field within the PID. + * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache + * line when posting interrupts and setting control bits. + * 4.The CPU can access the cache line a magnitude faster than the IOMMU. + * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID + * cache line. The cache line states after each operation are as follows, + * assuming a 64-bit kernel: + * CPU IOMMU PID Cache line state + * --------------------------------------------------------------- + *...read64 exclusive + *...lock xchg64 modified + *... post/atomic swap invalid + *...------------------------------------------------------------- + * + * To reduce L1 data cache miss, it is important to avoid contention with + * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used + * when processing posted interrupts in software, e.g. to dispatch interrupt + * handlers for posted MSIs, or to move interrupts from the PIR to the vIRR + * in KVM. + * + * In addition, the code is trying to keep the cache line state consistent + * as much as possible. e.g. when making a copy and clearing the PIR + * (assuming non-zero PIR bits are present in the entire PIR), it does: + * read, read, read, read, xchg, xchg, xchg, xchg + * instead of: + * read, xchg, read, xchg, read, xchg, read, xchg + */ +static __always_inline bool pi_harvest_pir(unsigned long *pir, + unsigned long *pir_vals) +{ + unsigned long pending = 0; + int i; + + for (i = 0; i < NR_PIR_WORDS; i++) { + pir_vals[i] = READ_ONCE(pir[i]); + pending |= pir_vals[i]; + } + + if (!pending) + return false; + + for (i = 0; i < NR_PIR_WORDS; i++) { + if (!pir_vals[i]) + continue; + + pir_vals[i] = arch_xchg(&pir[i], 0); + } + + return true; +} + +static inline bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, pi_desc->pir); +} + +static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) +{ + return bitmap_empty(pi_desc->pir, NR_VECTORS); +} + +static inline void pi_set_sn(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline void pi_set_on(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_on(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_sn(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_on(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_sn(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_pir(int vector, struct pi_desc *pi_desc) +{ + return test_bit(vector, (unsigned long *)pi_desc->pir); +} + +/* Non-atomic helpers */ +static inline void __pi_set_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications |= BIT(POSTED_INTR_SN); +} + +static inline void __pi_clear_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications &= ~BIT(POSTED_INTR_SN); +} + +#ifdef CONFIG_X86_POSTED_MSI +/* + * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's + * own interrupts. Here we do not distinguish them since those vector bits in + * PIR will always be zero. + */ +static inline bool pi_pending_this_cpu(unsigned int vector) +{ + struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc); + + if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR)) + return false; + + return test_bit(vector, pid->pir); +} + +extern void intel_posted_msi_init(void); +#else +static inline bool pi_pending_this_cpu(unsigned int vector) { return false; } + +static inline void intel_posted_msi_init(void) {}; +#endif /* X86_POSTED_MSI */ + +#endif /* _X86_POSTED_INTR_H */ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 99a7fa9ab0a3..578441db09f0 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -4,9 +4,10 @@ #include <asm/rmwcc.h> #include <asm/percpu.h> -#include <linux/thread_info.h> -DECLARE_PER_CPU(int, __preempt_count); +#include <linux/static_call_types.h> + +DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count); /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 @@ -30,11 +31,11 @@ static __always_inline void preempt_count_set(int pc) { int old, new; + old = raw_cpu_read_4(__preempt_count); do { - old = raw_cpu_read_4(__preempt_count); new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); - } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old); + } while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new)); } /* @@ -43,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* @@ -91,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); + return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e, + __percpu_arg([var])); } /* @@ -102,17 +104,48 @@ static __always_inline bool should_resched(int preempt_offset) return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } -#ifdef CONFIG_PREEMPT - extern asmlinkage void ___preempt_schedule(void); -# define __preempt_schedule() \ - asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT) +#ifdef CONFIG_PREEMPTION + +extern asmlinkage void preempt_schedule(void); +extern asmlinkage void preempt_schedule_thunk(void); + +#define preempt_schedule_dynamic_enabled preempt_schedule_thunk +#define preempt_schedule_dynamic_disabled NULL + +extern asmlinkage void preempt_schedule_notrace(void); +extern asmlinkage void preempt_schedule_notrace_thunk(void); + +#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk +#define preempt_schedule_notrace_dynamic_disabled NULL + +#ifdef CONFIG_PREEMPT_DYNAMIC + +DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); + +#define __preempt_schedule() \ +do { \ + __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \ + asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \ +} while (0) + +DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); + +#define __preempt_schedule_notrace() \ +do { \ + __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \ + asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \ +} while (0) + +#else /* PREEMPT_DYNAMIC */ + +#define __preempt_schedule() \ + asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT); + +#define __preempt_schedule_notrace() \ + asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT); - extern asmlinkage void preempt_schedule(void); - extern asmlinkage void ___preempt_schedule_notrace(void); -# define __preempt_schedule_notrace() \ - asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT) +#endif /* PREEMPT_DYNAMIC */ - extern asmlinkage void preempt_schedule_notrace(void); -#endif +#endif /* PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h index aaedd73ea2c6..efe3e46e454b 100644 --- a/arch/x86/include/asm/processor-cyrix.h +++ b/arch/x86/include/asm/processor-cyrix.h @@ -3,37 +3,16 @@ * NSC/Cyrix CPU indexed register access. Must be inlined instead of * macros to ensure correct access ordering * Access order is always 0x22 (=offset), 0x23 (=value) - * - * When using the old macros a line like - * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); - * gets expanded to: - * do { - * outb((CX86_CCR2), 0x22); - * outb((({ - * outb((CX86_CCR2), 0x22); - * inb(0x23); - * }) | 0x88), 0x23); - * } while (0); - * - * which in fact violates the access order (= 0x22, 0x22, 0x23, 0x23). */ +#include <asm/pc-conf-reg.h> + static inline u8 getCx86(u8 reg) { - outb(reg, 0x22); - return inb(0x23); + return pc_conf_get(reg); } static inline void setCx86(u8 reg, u8 data) { - outb(reg, 0x22); - outb(data, 0x23); + pc_conf_set(reg, data); } - -#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86_old(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 02c2cbda4a74..e5f204b9b33d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -28,6 +28,8 @@ * On systems with SME, one bit (in a variable position!) is stolen to indicate * that the top-level paging structure is encrypted. * + * On systemms with LAM, bits 61 and 62 are used to indicate LAM mode. + * * All of the remaining bits indicate the physical address of the top-level * paging structure. * @@ -35,7 +37,7 @@ */ #ifdef CONFIG_X86_64 /* Mask off the address space ID and SME encryption bits. */ -#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) +#define CR3_ADDR_MASK __sme_clr(PHYSICAL_PAGE_MASK) #define CR3_PCID_MASK 0xFFFull #define CR3_NOFLUSH BIT_ULL(63) @@ -49,7 +51,7 @@ #define CR3_NOFLUSH 0 #endif -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION # define X86_CR3_PTI_PCID_USER_BIT 11 #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 33051436c864..a24c7805acdb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -7,6 +7,7 @@ /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; +struct io_bitmap; struct vm86; #include <asm/math_emu.h> @@ -15,15 +16,18 @@ struct vm86; #include <uapi/asm/sigcontext.h> #include <asm/current.h> #include <asm/cpufeatures.h> +#include <asm/cpuid/api.h> #include <asm/page.h> #include <asm/pgtable_types.h> #include <asm/percpu.h> -#include <asm/msr.h> #include <asm/desc_defs.h> #include <asm/nops.h> #include <asm/special_insns.h> #include <asm/fpu/types.h> #include <asm/unwind_hints.h> +#include <asm/vmxfeatures.h> +#include <asm/vdso/processor.h> +#include <asm/shstk.h> #include <linux/personality.h> #include <linux/cache.h> @@ -56,86 +60,135 @@ struct vm86; # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -enum tlb_infos { - ENTRIES, - NR_INFO -}; - -extern u16 __read_mostly tlb_lli_4k[NR_INFO]; -extern u16 __read_mostly tlb_lli_2m[NR_INFO]; -extern u16 __read_mostly tlb_lli_4m[NR_INFO]; -extern u16 __read_mostly tlb_lld_4k[NR_INFO]; -extern u16 __read_mostly tlb_lld_2m[NR_INFO]; -extern u16 __read_mostly tlb_lld_4m[NR_INFO]; -extern u16 __read_mostly tlb_lld_1g[NR_INFO]; +extern u16 __read_mostly tlb_lli_4k; +extern u16 __read_mostly tlb_lli_2m; +extern u16 __read_mostly tlb_lli_4m; +extern u16 __read_mostly tlb_lld_4k; +extern u16 __read_mostly tlb_lld_2m; +extern u16 __read_mostly tlb_lld_4m; +extern u16 __read_mostly tlb_lld_1g; /* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head_32.S, so think twice - * before touching them. [mj] + * CPU type and hardware bug flags. Kept separately for each CPU. */ +struct cpuinfo_topology { + // Real APIC ID read from the local APIC + u32 apicid; + // The initial APIC ID provided by CPUID + u32 initial_apicid; + + // Physical package ID + u32 pkg_id; + + // Physical die ID on AMD, Relative on Intel + u32 die_id; + + // Compute unit ID - AMD specific + u32 cu_id; + + // Core ID relative to the package + u32 core_id; + + // Logical ID mappings + u32 logical_pkg_id; + u32 logical_die_id; + u32 logical_core_id; + + // AMD Node ID and Nodes per Package info + u32 amd_node_id; + + // Cache level topology IDs + u32 llc_id; + u32 l2c_id; + + // Hardware defined CPU-type + union { + u32 cpu_type; + struct { + // CPUID.1A.EAX[23-0] + u32 intel_native_model_id :24; + // CPUID.1A.EAX[31-24] + u32 intel_type :8; + }; + struct { + // CPUID 0x80000026.EBX + u32 amd_num_processors :16, + amd_power_eff_ranking :8, + amd_native_model_id :4, + amd_type :4; + }; + }; +}; + struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; + union { + /* + * The particular ordering (low-to-high) of (vendor, + * family, model) is done in case range of models, like + * it is usually done on AMD, need to be compared. + */ + struct { + __u8 x86_model; + /* CPU family */ + __u8 x86; + /* CPU vendor */ + __u8 x86_vendor; + __u8 x86_reserved; + }; + /* combined vendor, family, model */ + __u32 x86_vfm; + }; __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; #endif +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + __u32 vmx_capability[NVMXINTS]; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; - /* CPUID returned core id bits: */ - __u8 x86_coreid_bits; - __u8 cu_id; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; - __u32 x86_capability[NCAPINTS + NBUGINTS]; + /* + * Align to size of unsigned long because the x86_capability array + * is passed to bitops which require the alignment. Use unnamed + * union to enforce the array is aligned to size of unsigned long. + */ + union { + __u32 x86_capability[NCAPINTS + NBUGINTS]; + unsigned long x86_capability_alignment; + }; char x86_vendor_id[16]; char x86_model_id[64]; + struct cpuinfo_topology topo; /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ - /* Cache QoS architectural values: */ + /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ + int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; - /* cpuid returned max cores value: */ - u16 x86_max_cores; - u16 apicid; - u16 initial_apicid; + /* protected processor identification number */ + u64 ppin; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; - /* Physical processor id: */ - u16 phys_proc_id; - /* Logical processor id: */ - u16 logical_proc_id; - /* Core id: */ - u16 cpu_core_id; /* Index into per_cpu list: */ u16 cpu_index; + /* Is SMT active on this core? */ + bool smt_active; u32 microcode; /* Address space bits used by the cache internally */ u8 x86_cache_bits; unsigned initialized : 1; } __randomize_layout; -struct cpuid_regs { - u32 eax, ebx, ecx, edx; -}; - -enum cpuid_regs_idx { - CPUID_EAX = 0, - CPUID_EBX, - CPUID_ECX, - CPUID_EDX, -}; - #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 #define X86_VENDOR_AMD 2 @@ -144,7 +197,9 @@ enum cpuid_regs_idx { #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 -#define X86_VENDOR_NUM 10 +#define X86_VENDOR_ZHAOXIN 10 +#define X86_VENDOR_VORTEX 11 +#define X86_VENDOR_NUM 12 #define X86_VENDOR_UNKNOWN 0xff @@ -154,17 +209,11 @@ enum cpuid_regs_idx { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; -extern struct x86_hw_tss doublefault_tss; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; -#ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) -#else -#define cpu_info boot_cpu_data -#define cpu_data(cpu) boot_cpu_data -#endif extern const struct seq_operations cpuinfo_op; @@ -177,51 +226,13 @@ static inline unsigned long long l1tf_pfn_limit(void) return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); } +void init_cpu_devs(void); +void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); -extern void identify_boot_cpu(void); -extern void identify_secondary_cpu(struct cpuinfo_x86 *); +extern void identify_secondary_cpu(unsigned int cpu); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); -#ifdef CONFIG_X86_32 -extern int have_cpuid_p(void); -#else -static inline int have_cpuid_p(void) -{ - return 1; -} -#endif -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx) - : "memory"); -} - -#define native_cpuid_reg(reg) \ -static inline unsigned int native_cpuid_##reg(unsigned int op) \ -{ \ - unsigned int eax = op, ebx, ecx = 0, edx; \ - \ - native_cpuid(&eax, &ebx, &ecx, &edx); \ - \ - return reg; \ -} - -/* - * Native CPUID functions returning a single datum. - */ -native_cpuid_reg(eax) -native_cpuid_reg(ebx) -native_cpuid_reg(ecx) -native_cpuid_reg(edx) - /* * Friendlier CR3 helpers. */ @@ -297,11 +308,6 @@ struct x86_hw_tss { struct x86_hw_tss { u32 reserved1; u64 sp0; - - /* - * We store cpu_current_top_of_stack in sp1 so it's always accessible. - * Linux does not use ring 1, so sp1 is not otherwise needed. - */ u64 sp1; /* @@ -325,26 +331,56 @@ struct x86_hw_tss { * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) -#define INVALID_IO_BITMAP_OFFSET 0x8000 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) + +#define IO_BITMAP_OFFSET_VALID_MAP \ + (offsetof(struct tss_struct, io_bitmap.bitmap) - \ + offsetof(struct tss_struct, x86_tss)) + +#define IO_BITMAP_OFFSET_VALID_ALL \ + (offsetof(struct tss_struct, io_bitmap.mapall) - \ + offsetof(struct tss_struct, x86_tss)) + +#ifdef CONFIG_X86_IOPL_IOPERM +/* + * sizeof(unsigned long) coming from an extra "long" at the end of the + * iobitmap. The limit is inclusive, i.e. the last valid byte. + */ +# define __KERNEL_TSS_LIMIT \ + (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ + sizeof(unsigned long) - 1) +#else +# define __KERNEL_TSS_LIMIT \ + (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) +#endif + +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { - unsigned long words[64]; + char stack[PAGE_SIZE]; }; struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); -struct tss_struct { +/* + * All IO bitmap related data stored in the TSS: + */ +struct x86_io_bitmap { + /* The sequence number of the last active bitmap. */ + u64 prev_sequence; + /* - * The fixed hardware portion. This must not cross a page boundary - * at risk of violating the SDM's advice and potentially triggering - * errata. + * Store the dirty size of the last io bitmap offender. The next + * one will have to do the cleanup as the switch out to a non io + * bitmap user will just set x86_tss.io_bitmap_base to a value + * outside of the TSS limit. So for sane tasks there is no need to + * actually touch the io_bitmap at all. */ - struct x86_hw_tss x86_tss; + unsigned int prev_max; /* * The extra 1 is there because the CPU will access an @@ -352,101 +388,63 @@ struct tss_struct { * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + unsigned long bitmap[IO_BITMAP_LONGS + 1]; + + /* + * Special I/O bitmap to emulate IOPL(3). All bytes zero, + * except the additional byte at the end. + */ + unsigned long mapall[IO_BITMAP_LONGS + 1]; +}; + +struct tss_struct { + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. + */ + struct x86_hw_tss x86_tss; + + struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); -/* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ -#define __KERNEL_TSS_LIMIT \ - (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) +/* Per CPU interrupt stacks */ +struct irq_stack { + char stack[IRQ_STACK_SIZE]; +} __aligned(IRQ_STACK_SIZE); -#ifdef CONFIG_X86_32 -DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); +DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); #else -/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ -#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 +DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); #endif -/* - * Save the original ist values for checking stack pointers during debugging - */ -struct orig_ist { - unsigned long ist[7]; -}; +DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack); +/* const-qualified alias provided by the linker. */ +DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override, + const_cpu_current_top_of_stack); #ifdef CONFIG_X86_64 -DECLARE_PER_CPU(struct orig_ist, orig_ist); - -union irq_stack_union { - char irq_stack[IRQ_STACK_SIZE]; - /* - * GCC hardcodes the stack canary as %gs:40. Since the - * irq_stack is the object at %gs:0, we reserve the bottom - * 48 bytes of the irq stack for the canary. - */ - struct { - char gs_base[40]; - unsigned long stack_canary; - }; -}; - -DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; -DECLARE_INIT_PER_CPU(irq_stack_union); - static inline unsigned long cpu_kernelmode_gs_base(int cpu) { - return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); +#ifdef CONFIG_SMP + return per_cpu_offset(cpu); +#else + return 0; +#endif } -DECLARE_PER_CPU(char *, irq_stack_ptr); -DECLARE_PER_CPU(unsigned int, irq_count); -extern asmlinkage void ignore_sysret(void); +extern asmlinkage void entry_SYSCALL32_ignore(void); -#if IS_ENABLED(CONFIG_KVM) /* Save actual FS/GS selectors and bases to current->thread */ -void save_fsgs_for_kvm(void); -#endif -#else /* X86_64 */ -#ifdef CONFIG_STACKPROTECTOR -/* - * Make sure stack canary segment base is cached-aligned: - * "For Intel Atom processors, avoid non zero segment base address - * that is not aligned to cache line boundary at all cost." - * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) - */ -struct stack_canary { - char __pad[20]; /* canary at %gs:20 */ - unsigned long canary; -}; -DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); -#endif -/* - * per-CPU IRQ handling stacks - */ -struct irq_stack { - u32 stack[THREAD_SIZE/sizeof(u32)]; -} __aligned(THREAD_SIZE); - -DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); -DECLARE_PER_CPU(struct irq_stack *, softirq_stack); +void current_save_fsgs(void); #endif /* X86_64 */ -extern unsigned int fpu_kernel_xstate_size; -extern unsigned int fpu_user_xstate_size; - struct perf_event; -typedef struct { - unsigned long seg; -} mm_segment_t; - struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -478,7 +476,7 @@ struct thread_struct { /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ - unsigned long debugreg6; + unsigned long virtual_dr6; /* Keep track of the exact dr7 value set by the user */ unsigned long ptrace_dr7; /* Fault info: */ @@ -490,58 +488,46 @@ struct thread_struct { struct vm86 *vm86; #endif /* IO permissions: */ - unsigned long *io_bitmap_ptr; - unsigned long iopl; - /* Max allowed port in the bitmap, in bytes: */ - unsigned io_bitmap_max; + struct io_bitmap *io_bitmap; - mm_segment_t addr_limit; + /* + * IOPL. Privilege level dependent I/O permission which is + * emulated via the I/O bitmap to prevent user space from disabling + * interrupts. + */ + unsigned long iopl_emul; - unsigned int sig_on_uaccess_err:1; - unsigned int uaccess_err:1; /* uaccess failed */ + unsigned int iopl_warn:1; - /* Floating point and extended processor state */ - struct fpu fpu; /* - * WARNING: 'fpu' is dynamically-sized. It *MUST* be at - * the end. + * Protection Keys Register for Userspace. Loaded immediately on + * context switch. Store it in thread_struct to avoid a lookup in + * the tasks's FPU xstate buffer. This value is only valid when a + * task is scheduled out. For 'current' the authoritative source of + * PKRU is the hardware itself. */ + u32 pkru; + +#ifdef CONFIG_X86_USER_SHADOW_STACK + unsigned long features; + unsigned long features_locked; + + struct thread_shstk shstk; +#endif }; -/* Whitelist the FPU state from the task_struct for hardened usercopy. */ -static inline void arch_thread_struct_whitelist(unsigned long *offset, - unsigned long *size) -{ - *offset = offsetof(struct thread_struct, fpu.state); - *size = fpu_kernel_xstate_size; -} +#ifdef CONFIG_X86_DEBUG_FPU +extern struct fpu *x86_task_fpu(struct task_struct *task); +#else +# define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task)))) +#endif -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size); -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void native_set_iopl_mask(unsigned mask) +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) { -#ifdef CONFIG_X86_32 - unsigned int reg; - - asm volatile ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -#endif + fpu_thread_struct_whitelist(offset, size); } static inline void @@ -550,24 +536,27 @@ native_load_sp0(unsigned long sp0) this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } -static inline void native_swapgs(void) +static __always_inline void native_swapgs(void) { #ifdef CONFIG_X86_64 asm volatile("swapgs" ::: "memory"); #endif } -static inline unsigned long current_top_of_stack(void) +static __always_inline unsigned long current_top_of_stack(void) { /* * We can't read directly from tss.sp0: sp0 on x86_32 is special in * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ + if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) + return this_cpu_read_const(const_cpu_current_top_of_stack); + return this_cpu_read_stable(cpu_current_top_of_stack); } -static inline bool on_thread_stack(void) +static __always_inline bool on_thread_stack(void) { return (unsigned long)(current_top_of_stack() - current_stack_pointer) < THREAD_SIZE; @@ -576,162 +565,17 @@ static inline bool on_thread_stack(void) #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else -#define __cpuid native_cpuid static inline void load_sp0(unsigned long sp0) { native_load_sp0(sp0); } -#define set_iopl_mask native_set_iopl_mask #endif /* CONFIG_PARAVIRT_XXL */ -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); - -unsigned long get_wchan(struct task_struct *p); - -/* - * Generic CPUID function - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx - * resulting in stale register contents being returned. - */ -static inline void cpuid(unsigned int op, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = 0; - __cpuid(eax, ebx, ecx, edx); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(unsigned int op, int count, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = count; - __cpuid(eax, ebx, ecx, edx); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return eax; -} - -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return ebx; -} - -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return ecx; -} - -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - - return edx; -} - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static __always_inline void rep_nop(void) -{ - asm volatile("rep; nop" ::: "memory"); -} - -static __always_inline void cpu_relax(void) -{ - rep_nop(); -} - -/* - * This function forces the icache and prefetched instruction stream to - * catch up with reality in two very specific cases: - * - * a) Text was modified using one virtual address and is about to be executed - * from the same physical page at a different virtual address. - * - * b) Text was modified on a different CPU, may subsequently be - * executed on this CPU, and you want to make sure the new version - * gets executed. This generally means you're calling this in a IPI. - * - * If you're calling this for a different reason, you're probably doing - * it wrong. - */ -static inline void sync_core(void) -{ - /* - * There are quite a few ways to do this. IRET-to-self is nice - * because it works on every CPU, at any CPL (so it's compatible - * with paravirtualization), and it never exits to a hypervisor. - * The only down sides are that it's a bit slow (it seems to be - * a bit more than 2x slower than the fastest options) and that - * it unmasks NMIs. The "push %cs" is needed because, in - * paravirtual environments, __KERNEL_CS may not be a valid CS - * value when we do IRET directly. - * - * In case NMI unmasking or performance ever becomes a problem, - * the next best option appears to be MOV-to-CR2 and an - * unconditional jump. That sequence also works on all CPUs, - * but it will fault at CPL3 (i.e. Xen PV). - * - * CPUID is the conventional way, but it's nasty: it doesn't - * exist on some 486-like CPUs, and it usually exits to a - * hypervisor. - * - * Like all of Linux's memory ordering operations, this is a - * compiler barrier as well. - */ -#ifdef CONFIG_X86_32 - asm volatile ( - "pushfl\n\t" - "pushl %%cs\n\t" - "pushl $1f\n\t" - "iret\n\t" - "1:" - : ASM_CALL_CONSTRAINT : : "memory"); -#else - unsigned int tmp; - - asm volatile ( - UNWIND_HINT_SAVE - "mov %%ss, %0\n\t" - "pushq %q0\n\t" - "pushq %%rsp\n\t" - "addq $8, (%%rsp)\n\t" - "pushfq\n\t" - "mov %%cs, %0\n\t" - "pushq %q0\n\t" - "pushq $1f\n\t" - "iretq\n\t" - UNWIND_HINT_RESTORE - "1:" - : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); -#endif -} +unsigned long __get_wchan(struct task_struct *p); -extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void select_idle_routine(void); extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; @@ -740,40 +584,18 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; extern void enable_sep_cpu(void); -extern int sysenter_setup(void); -void early_trap_pf_init(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; -extern void switch_to_new_gdt(int); +extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); -extern void load_percpu_segment(int); extern void cpu_init(void); - -static inline unsigned long get_debugctlmsr(void) -{ - unsigned long debugctlmsr = 0; - -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return 0; -#endif - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); - - return debugctlmsr; -} - -static inline void update_debugctlmsr(unsigned long debugctlmsr) -{ -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return; -#endif - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); -} +extern void cpu_init_exception_handling(bool boot_cpu); +extern void cpu_init_replace_early_idt(void); +extern void cr4_init(void); extern void set_task_blockstep(struct task_struct *task, bool on); @@ -785,13 +607,12 @@ extern char ignore_fpu_irq; #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH #ifdef CONFIG_X86_32 # define BASE_PREFETCH "" # define ARCH_HAS_PREFETCH #else -# define BASE_PREFETCH "prefetcht0 %P1" +# define BASE_PREFETCH "prefetcht0 %1" #endif /* @@ -802,7 +623,7 @@ extern char ignore_fpu_irq; */ static inline void prefetch(const void *x) { - alternative_input(BASE_PREFETCH, "prefetchnta %P1", + alternative_input(BASE_PREFETCH, "prefetchnta %1", X86_FEATURE_XMM, "m" (*(const char *)x)); } @@ -812,18 +633,13 @@ static inline void prefetch(const void *x) * Useful for spinlocks to avoid one state transition in the * cache coherency protocol: */ -static inline void prefetchw(const void *x) +static __always_inline void prefetchw(const void *x) { - alternative_input(BASE_PREFETCH, "prefetchw %P1", + alternative_input(BASE_PREFETCH, "prefetchw %1", X86_FEATURE_3DNOWPREFETCH, "m" (*(const char *)x)); } -static inline void spin_lock_prefetch(const void *x) -{ - prefetchw(x); -} - #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) @@ -837,71 +653,18 @@ static inline void spin_lock_prefetch(const void *x) }) #ifdef CONFIG_X86_32 -/* - * User space process size: 3GB (default). - */ -#define IA32_PAGE_OFFSET PAGE_OFFSET -#define TASK_SIZE PAGE_OFFSET -#define TASK_SIZE_LOW TASK_SIZE -#define TASK_SIZE_MAX TASK_SIZE -#define DEFAULT_MAP_WINDOW TASK_SIZE -#define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX STACK_TOP - #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ - .io_bitmap_ptr = NULL, \ - .addr_limit = KERNEL_DS, \ } -#define KSTK_ESP(task) (task_pt_regs(task)->sp) - #else -/* - * User space process size. This is the first address outside the user range. - * There are a few constraints that determine this: - * - * On Intel CPUs, if a SYSCALL instruction is at the highest canonical - * address, then that syscall will enter the kernel with a - * non-canonical return address, and SYSRET will explode dangerously. - * We avoid this particular problem by preventing anything executable - * from being mapped at the maximum canonical address. - * - * On AMD CPUs in the Ryzen family, there's a nasty bug in which the - * CPUs malfunction if they execute code from the highest canonical page. - * They'll speculate right off the end of the canonical space, and - * bad things happen. This is worked around in the same way as the - * Intel problem. - * - * With page table isolation enabled, we map the LDT in ... [stay tuned] - */ -#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) - -#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ - 0xc0000000 : 0xFFFFe000) - -#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ - IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) -#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ - IA32_PAGE_OFFSET : TASK_SIZE_MAX) -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ - IA32_PAGE_OFFSET : TASK_SIZE_MAX) - -#define STACK_TOP TASK_SIZE_LOW -#define STACK_TOP_MAX TASK_SIZE_MAX +extern unsigned long __top_init_kernel_stack[]; -#define INIT_THREAD { \ - .addr_limit = KERNEL_DS, \ +#define INIT_THREAD { \ + .sp = (unsigned long)&__top_init_kernel_stack, \ } -extern unsigned long KSTK_ESP(struct task_struct *task); - #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, @@ -915,6 +678,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) #define KSTK_EIP(task) (task_pt_regs(task)->ip) +#define KSTK_ESP(task) (task_pt_regs(task)->sp) /* Get/set a process' ability to use the timestamp counter instruction */ #define GET_TSC_CTL(adr) get_tsc_mode((adr)) @@ -925,50 +689,36 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); -/* Register/unregister a process' MPX related resource */ -#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() -#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() - -#ifdef CONFIG_X86_INTEL_MPX -extern int mpx_enable_management(void); -extern int mpx_disable_management(void); -#else -static inline int mpx_enable_management(void) +static inline u32 per_cpu_llc_id(unsigned int cpu) { - return -EINVAL; + return per_cpu(cpu_info.topo.llc_id, cpu); } -static inline int mpx_disable_management(void) + +static inline u32 per_cpu_l2c_id(unsigned int cpu) { - return -EINVAL; + return per_cpu(cpu_info.topo.l2c_id, cpu); } -#endif /* CONFIG_X86_INTEL_MPX */ #ifdef CONFIG_CPU_SUP_AMD -extern u16 amd_get_nb_id(int cpu); -extern u32 amd_get_nodes_per_socket(void); -#else -static inline u16 amd_get_nb_id(int cpu) { return 0; } -static inline u32 amd_get_nodes_per_socket(void) { return 0; } -#endif - -static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) +/* + * Issue a DIV 0/1 insn to clear any division data from previous DIV + * operations. + */ +static __always_inline void amd_clear_divider(void) { - uint32_t base, eax, signature[3]; - - for (base = 0x40000000; base < 0x40010000; base += 0x100) { - cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); - - if (!memcmp(sig, signature, 12) && - (leaves == 0 || ((eax - base) >= leaves))) - return base; - } - - return 0; + asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) + :: "a" (0), "d" (0), "r" (1)); } +extern void amd_check_microcode(void); +#else +static inline void amd_clear_divider(void) { } +static inline void amd_check_microcode(void) { } +#endif + extern unsigned long arch_align_stack(unsigned long sp); void free_init_pages(const char *what, unsigned long begin, unsigned long end); -extern void free_kernel_image_pages(void *begin, void *end); +extern void free_kernel_image_pages(const char *what, void *begin, void *end); void default_idle(void); #ifdef CONFIG_XEN @@ -977,12 +727,15 @@ bool xen_set_default_idle(void); #define xen_set_default_idle 0 #endif -void stop_this_cpu(void *dummy); -void df_debug(struct pt_regs *regs, long error_code); -void microcode_check(void); +void __noreturn stop_this_cpu(void *dummy); +void microcode_check(struct cpuinfo_x86 *prev_info); +void store_cpu_caps(struct cpuinfo_x86 *info); + +DECLARE_PER_CPU(bool, cache_state_incoherent); enum l1tf_mitigations { L1TF_MITIGATION_OFF, + L1TF_MITIGATION_AUTO, L1TF_MITIGATION_FLUSH_NOWARN, L1TF_MITIGATION_FLUSH, L1TF_MITIGATION_FLUSH_NOSMT, @@ -992,4 +745,31 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_AUTO, + MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, +}; + +extern bool gds_ucode_mitigated(void); + +/* + * Make previous memory operations globally visible before + * a WRMSR. + * + * MFENCE makes writes visible, but only affects load/store + * instructions. WRMSR is unfortunately not a load/store + * instruction and is unaffected by MFENCE. The LFENCE ensures + * that the WRMSR is not reordered. + * + * Most WRMSRs are full serializing instructions themselves and + * do not require this barrier. This is only required for the + * IA32_TSC_DEADLINE and X2APIC MSRs. + */ +static inline void weak_wrmsr_fence(void) +{ + alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE)); +} + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index 1d081ac1cd69..5d0dbab85264 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -1,18 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for Device tree / OpenFirmware handling on X86 * * based on arch/powerpc/include/asm/prom.h which is * Copyright (C) 1996-2005 Paul Mackerras. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef _ASM_X86_PROM_H #define _ASM_X86_PROM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/of.h> #include <linux/types.h> @@ -27,15 +23,15 @@ extern int of_ioapic; extern u64 initial_dtb; extern void add_dtb(u64 data); void x86_of_pci_init(void); -void x86_dtb_init(void); +void x86_flattree_get_config(void); #else static inline void add_dtb(u64 data) { } static inline void x86_of_pci_init(void) { } -static inline void x86_dtb_init(void) { } +static inline void x86_flattree_get_config(void) { } #define of_ioapic 0 #endif extern char cmd_line[COMMAND_LINE_SIZE]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6e81788a30c1..05224a695872 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -4,13 +4,17 @@ #include <asm/ldt.h> +struct task_struct; + /* misc architecture specific prototypes */ void syscall_init(void); #ifdef CONFIG_X86_64 void entry_SYSCALL_64(void); -long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); +void entry_SYSCALL_64_safe_stack(void); +void entry_SYSRETQ_unsafe_stack(void); +void entry_SYSRETQ_end(void); #endif #ifdef CONFIG_X86_32 @@ -24,18 +28,18 @@ void __end_SYSENTER_singlestep_region(void); void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); -void entry_INT80_compat(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -void xen_entry_INT80_compat(void); -#endif +void entry_SYSCALL_compat_safe_stack(void); +void entry_SYSRETL_compat_unsafe_stack(void); +void entry_SYSRETL_compat_end(void); +#else /* !CONFIG_IA32_EMULATION */ +#define entry_SYSCALL_compat NULL +#define entry_SYSENTER_compat NULL #endif void x86_configure_nx(void); -void x86_report_nx(void); extern int reboot_force; -long do_arch_prctl_common(struct task_struct *task, int option, - unsigned long cpuid_enabled); +long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index 5df09a0b80b8..88d0a1ab1f77 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -1,9 +1,9 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTI_H #define _ASM_X86_PTI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION extern void pti_init(void); extern void pti_check_boottime_disable(void); extern void pti_finalize(void); @@ -11,5 +11,5 @@ extern void pti_finalize(void); static inline void pti_check_boottime_disable(void) { } #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 8a7fc0cca2d1..35d062a2e304 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -6,7 +6,7 @@ #include <asm/page_types.h> #include <uapi/asm/ptrace.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef __i386__ struct pt_regs { @@ -37,7 +37,10 @@ struct pt_regs { unsigned short __esh; unsigned short fs; unsigned short __fsh; - /* On interrupt, gs and __gsh store the vector number. */ + /* + * On interrupt, gs and __gsh store the vector number. They never + * store gs any more. + */ unsigned short gs; unsigned short __gsh; /* On interrupt, this is the error code. */ @@ -53,18 +56,64 @@ struct pt_regs { #else /* __i386__ */ +struct fred_cs { + /* CS selector */ + u64 cs : 16, + /* Stack level at event time */ + sl : 2, + /* IBT in WAIT_FOR_ENDBRANCH state */ + wfe : 1, + : 45; +}; + +struct fred_ss { + /* SS selector */ + u64 ss : 16, + /* STI state */ + sti : 1, + /* Set if syscall, sysenter or INT n */ + swevent : 1, + /* Event is NMI type */ + nmi : 1, + : 13, + /* Event vector */ + vector : 8, + : 8, + /* Event type */ + type : 4, + : 4, + /* Event was incident to enclave execution */ + enclave : 1, + /* CPU was in 64-bit mode */ + l : 1, + /* + * Nested exception during FRED delivery, not set + * for #DF. + */ + nested : 1, + : 1, + /* + * The length of the instruction causing the event. + * Only set for INTO, INT1, INT3, INT n, SYSCALL + * and SYSENTER. 0 otherwise. + */ + insnlen : 4; +}; + struct pt_regs { -/* - * C ABI says these regs are callee-preserved. They aren't saved on kernel entry - * unless syscall needs a complete, fully filled "struct pt_regs". - */ + /* + * C ABI says these regs are callee-preserved. They aren't saved on + * kernel entry unless syscall needs a complete, fully filled + * "struct pt_regs". + */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long bp; unsigned long bx; -/* These regs are callee-clobbered. Always saved on kernel entry. */ + + /* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; @@ -74,18 +123,50 @@ struct pt_regs { unsigned long dx; unsigned long si; unsigned long di; -/* - * On syscall entry, this is syscall#. On CPU exception, this is error code. - * On hw interrupt, it's IRQ number: - */ + + /* + * orig_ax is used on entry for: + * - the syscall number (syscall, sysenter, int80) + * - error_code stored by the CPU on traps and exceptions + * - the interrupt number for device interrupts + * + * A FRED stack frame starts here: + * 1) It _always_ includes an error code; + * + * 2) The return frame for ERET[US] starts here, but + * the content of orig_ax is ignored. + */ unsigned long orig_ax; -/* Return frame for iretq */ + + /* The IRETQ return frame starts here */ unsigned long ip; - unsigned long cs; + + union { + /* CS selector */ + u16 cs; + /* The extended 64-bit data slot containing CS */ + u64 csx; + /* The FRED CS extension */ + struct fred_cs fred_cs; + }; + unsigned long flags; unsigned long sp; - unsigned long ss; -/* top of stack page */ + + union { + /* SS selector */ + u16 ss; + /* The extended 64-bit data slot containing SS */ + u64 ssx; + /* The FRED SS extension */ + struct fred_ss fred_ss; + }; + + /* + * Top of stack on IDT systems, while FRED systems have extra fields + * defined above for storing exception related information, e.g. CR2 or + * DR6. + */ }; #endif /* !__i386__ */ @@ -94,24 +175,24 @@ struct pt_regs { #include <asm/paravirt_types.h> #endif +#include <asm/proto.h> + struct cpuinfo_x86; struct task_struct; extern unsigned long profile_pc(struct pt_regs *regs); -#define profile_pc profile_pc extern unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); -extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code); +extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code); -static inline unsigned long regs_return_value(struct pt_regs *regs) +static __always_inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->ax; } -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->ax = rc; } @@ -125,7 +206,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) * On x86_64, vm86 mode is mercifully nonexistent, and we don't need * the extra check. */ -static inline int user_mode(struct pt_regs *regs) +static __always_inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; @@ -134,7 +215,7 @@ static inline int user_mode(struct pt_regs *regs) #endif } -static inline int v8086_mode(struct pt_regs *regs) +static __always_inline int v8086_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return (regs->flags & X86_VM_MASK); @@ -161,25 +242,77 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #endif } +/* + * Determine whether the register set came from any context that is running in + * 64-bit mode. + */ +static inline bool any_64bit_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 + return !user_mode(regs) || user_64bit_mode(regs); +#else + return false; +#endif +} + #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp + +static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs) +{ + bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && + regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); + + ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack && + regs->ip < (unsigned long)entry_SYSRETQ_end); +#ifdef CONFIG_IA32_EMULATION + ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && + regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); + ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack && + regs->ip < (unsigned long)entry_SYSRETL_compat_end); #endif -#ifdef CONFIG_X86_32 -extern unsigned long kernel_stack_pointer(struct pt_regs *regs); -#else -static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) + return ret; +} +#endif + +static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } -#endif -#define GET_IP(regs) ((regs)->ip) -#define GET_FP(regs) ((regs)->bp) -#define GET_USP(regs) ((regs)->sp) +static __always_inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->ip; +} -#include <asm-generic/ptrace.h> +static __always_inline +void instruction_pointer_set(struct pt_regs *regs, unsigned long val) +{ + regs->ip = val; +} + +static __always_inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return regs->bp; +} + +static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + +static __always_inline +void user_stack_pointer_set(struct pt_regs *regs, unsigned long val) +{ + regs->sp = val; +} + +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) +{ + return !(regs->flags & X86_EFLAGS_IF); +} /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); @@ -201,14 +334,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, if (unlikely(offset > MAX_REG_OFFSET)) return 0; #ifdef CONFIG_X86_32 - /* - * Traps from the kernel do not save sp and ss. - * Use the helper function to retrieve sp. - */ - if (offset == offsetof(struct pt_regs, sp) && - regs->cs == __KERNEL_CS) - return kernel_stack_pointer(regs); - /* The selector fields are 16-bit. */ if (offset == offsetof(struct pt_regs, cs) || offset == offsetof(struct pt_regs, ss) || @@ -234,8 +359,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, static inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { - return ((addr & ~(THREAD_SIZE - 1)) == - (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); + return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1))); } /** @@ -249,7 +373,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, */ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) { - unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + unsigned long *addr = (unsigned long *)regs->sp; addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) @@ -259,7 +383,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs } /* To avoid include hell, we can't include uaccess.h */ -extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack @@ -279,7 +403,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { - ret = probe_kernel_read(&val, addr, sizeof(val)); + ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } @@ -303,8 +427,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, static const unsigned int argument_offs[] = { #ifdef __i386__ offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), #define NR_REG_ARGUMENTS 3 #else offsetof(struct pt_regs, di), @@ -333,27 +457,17 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, #define ARCH_HAS_USER_SINGLE_STEP_REPORT -/* - * When hitting ptrace_stop(), we cannot return using SYSRET because - * that does not restore the full CPU state, only a minimal set. The - * ptracer can change arbitrary register values, which is usually okay - * because the usual ptrace stops run off the signal delivery path which - * forces IRET; however, ptrace_event() stops happen in arbitrary places - * in the kernel and don't force IRET path. - * - * So force IRET path after a ptrace stop. - */ -#define arch_ptrace_stop_needed(code, info) \ -({ \ - force_iret(); \ - false; \ -}) - struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#endif /* !__ASSEMBLY__ */ +#ifdef CONFIG_X86_64 +# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t) +#else +# define do_set_thread_area_64(p, s, t) (0) +#endif + +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h index 92c34e517da1..2fee5e9f1ccc 100644 --- a/arch/x86/include/asm/purgatory.h +++ b/arch/x86/include/asm/purgatory.h @@ -2,20 +2,10 @@ #ifndef _ASM_X86_PURGATORY_H #define _ASM_X86_PURGATORY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/purgatory.h> extern void purgatory(void); -/* - * These forward declarations serve two purposes: - * - * 1) Make sparse happy when checking arch/purgatory - * 2) Document that these are required to be global so the symbol - * lookup in kexec works - */ -extern unsigned long purgatory_backup_dest; -extern unsigned long purgatory_backup_src; -extern unsigned long purgatory_backup_sz; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 1436226efe3e..b9fece5fc96d 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_ABI_H #define _ASM_X86_PVCLOCK_ABI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * These structs MUST NOT be changed. @@ -44,5 +44,5 @@ struct pvclock_wall_clock { #define PVCLOCK_GUEST_STOPPED (1 << 1) /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index b6033680d458..6e4f8fae3ce9 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -2,11 +2,13 @@ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H -#include <linux/clocksource.h> +#include <asm/clocksource.h> #include <asm/pvclock-abi.h> +struct timespec64; /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); @@ -39,7 +41,7 @@ bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ -static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) +static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) { u64 product; #ifdef __i386__ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index bd5ac6cc37db..68da67df304d 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -32,6 +32,7 @@ extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __pv_init_lock_hash(void); extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); +extern bool nopvspin; #define queued_spin_unlock queued_spin_unlock /** @@ -52,6 +53,7 @@ static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) static inline void queued_spin_unlock(struct qspinlock *lock) { + kcsan_release(); pv_queued_spin_unlock(lock); } @@ -63,13 +65,30 @@ static inline bool vcpu_is_preempted(long cpu) #endif #ifdef CONFIG_PARAVIRT -DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); - -void native_pv_lock_init(void) __init; +/* + * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. + * + * Native (and PV wanting native due to vCPU pinning) should keep this key + * disabled. Native does not touch the key. + * + * When in a guest then native_pv_lock_init() enables the key first and + * KVM/XEN might conditionally disable it later in the boot process again. + */ +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); +/* + * Shortcut for the queued_spin_lock_slowpath() function that allows + * virt to hijack it. + * + * Returns: + * true - lock has been negotiated, all done; + * false - queued_spin_lock_slowpath() will do its thing. + */ #define virt_spin_lock virt_spin_lock static inline bool virt_spin_lock(struct qspinlock *lock) { + int val; + if (!static_branch_likely(&virt_spin_lock_key)) return false; @@ -79,17 +98,17 @@ static inline bool virt_spin_lock(struct qspinlock *lock) * horrible lock 'holder' preemption issues. */ - do { - while (atomic_read(&lock->val) != 0) - cpu_relax(); - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); + __retry: + val = atomic_read(&lock->val); + + if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { + cpu_relax(); + goto __retry; + } return true; } -#else -static inline void native_pv_lock_init(void) -{ -} + #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 159622ee0674..0a985784be9b 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -2,6 +2,10 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +#include <asm/ibt.h> + +void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked); + /* * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit * registers. For i386, however, only 1 32-bit register needs to be saved @@ -10,21 +14,20 @@ */ #ifdef CONFIG_64BIT -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); +__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); #define __pv_queued_spin_unlock __pv_queued_spin_unlock -#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" -#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" /* * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock * which combines the registers saving trunk and the body of the following - * C code: + * C code. Note that it puts the code in the .spinlock.text section which + * is equivalent to adding __lockfunc in the C code: * - * void __pv_queued_spin_unlock(struct qspinlock *lock) + * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock) * { - * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); + * u8 lockval = _Q_LOCKED_VAL; * - * if (likely(lockval == _Q_LOCKED_VAL)) + * if (try_cmpxchg(&lock->locked, &lockval, 0)) * return; * pv_queued_spin_unlock_slowpath(lock, lockval); * } @@ -34,36 +37,31 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * rsi = lockval (second argument) * rdx = internal variable (set to 0) */ -asm (".pushsection .text;" - ".globl " PV_UNLOCK ";" - ".type " PV_UNLOCK ", @function;" - ".align 4,0x90;" - PV_UNLOCK ": " - FRAME_BEGIN - "push %rdx;" - "mov $0x1,%eax;" - "xor %edx,%edx;" - LOCK_PREFIX "cmpxchg %dl,(%rdi);" - "cmp $0x1,%al;" - "jne .slowpath;" - "pop %rdx;" +#define PV_UNLOCK_ASM \ + FRAME_BEGIN \ + "push %rdx\n\t" \ + "mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \ + "xor %edx,%edx\n\t" \ + LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \ + "jne .slowpath\n\t" \ + "pop %rdx\n\t" \ + FRAME_END \ + ASM_RET \ + ".slowpath:\n\t" \ + "push %rsi\n\t" \ + "movzbl %al,%esi\n\t" \ + "call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t" \ + "pop %rsi\n\t" \ + "pop %rdx\n\t" \ FRAME_END - "ret;" - ".slowpath: " - "push %rsi;" - "movzbl %al,%esi;" - "call " PV_UNLOCK_SLOWPATH ";" - "pop %rsi;" - "pop %rdx;" - FRAME_END - "ret;" - ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" - ".popsection"); + +DEFINE_ASM_FUNC(__raw_callee_save___pv_queued_spin_unlock, + PV_UNLOCK_ASM, .spinlock.text); #else /* CONFIG_64BIT */ -extern void __pv_queued_spin_unlock(struct qspinlock *lock); -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); +extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock); +__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text"); #endif /* CONFIG_64BIT */ #endif diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 63b3393bd98e..e406a1e92c63 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -9,20 +9,23 @@ #define TH_FLAGS_SME_ACTIVE_BIT 0 #define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/io.h> -/* This must match data at realmode.S */ +/* This must match data at realmode/rm/header.S */ struct real_mode_header { u32 text_start; u32 ro_end; /* SMP trampoline */ u32 trampoline_start; - u32 trampoline_status; u32 trampoline_header; +#ifdef CONFIG_AMD_MEM_ENCRYPT + u32 sev_es_trampoline_start; +#endif #ifdef CONFIG_X86_64 + u32 trampoline_start64; u32 trampoline_pgd; #endif /* ACPI S3 wakeup */ @@ -37,7 +40,7 @@ struct real_mode_header { #endif }; -/* This must match data at trampoline_32/64.S */ +/* This must match data at realmode/rm/trampoline_{32,64}.S */ struct trampoline_header { #ifdef CONFIG_X86_32 u32 start; @@ -49,6 +52,7 @@ struct trampoline_header { u64 efer; u32 cr4; u32 flags; + u32 lock; #endif }; @@ -56,8 +60,12 @@ extern struct real_mode_header *real_mode_header; extern unsigned char real_mode_blob_end[]; extern unsigned long initial_code; -extern unsigned long initial_gs; extern unsigned long initial_stack; +#ifdef CONFIG_AMD_MEM_ENCRYPT +extern unsigned long initial_vc_handler; +#endif + +extern u32 *trampoline_lock; extern unsigned char real_mode_blob[]; extern unsigned char real_mode_relocs[]; @@ -67,9 +75,10 @@ extern unsigned char startup_32_smp[]; extern unsigned char boot_gdt[]; #else extern unsigned char secondary_startup_64[]; +extern unsigned char secondary_startup_64_no_verify[]; #endif -static inline size_t real_mode_size_needed(void) +static __always_inline size_t real_mode_size_needed(void) { if (real_mode_header) return 0; /* already allocated. */ @@ -77,9 +86,15 @@ static inline size_t real_mode_size_needed(void) return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE); } -void set_real_mode_mem(phys_addr_t mem, size_t size); +static inline void set_real_mode_mem(phys_addr_t mem) +{ + real_mode_header = (struct real_mode_header *) __va(mem); +} + void reserve_real_mode(void); +void load_trampoline_pgtable(void); +void init_real_mode(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 04c17be9b5fd..ecd58ea9a837 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,8 +25,18 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 +typedef void (cpu_emergency_virt_cb)(void); +#if IS_ENABLED(CONFIG_KVM_X86) +void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); +void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); +void cpu_emergency_disable_virtualization(void); +#else +static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {} +static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {} +static inline void cpu_emergency_disable_virtualization(void) {} +#endif /* CONFIG_KVM_X86 */ + typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); -void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); void run_crash_ipi_callback(struct pt_regs *regs); diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h deleted file mode 100644 index dbaed55c1c24..000000000000 --- a/arch/x86/include/asm/refcount.h +++ /dev/null @@ -1,112 +0,0 @@ -#ifndef __ASM_X86_REFCOUNT_H -#define __ASM_X86_REFCOUNT_H -/* - * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from - * PaX/grsecurity. - */ -#include <linux/refcount.h> -#include <asm/bug.h> - -/* - * This is the first portion of the refcount error handling, which lives in - * .text.unlikely, and is jumped to from the CPU flag check (in the - * following macros). This saves the refcount value location into CX for - * the exception handler to use (in mm/extable.c), and then triggers the - * central refcount exception. The fixup address for the exception points - * back to the regular execution flow in .text. - */ -#define _REFCOUNT_EXCEPTION \ - ".pushsection .text..refcount\n" \ - "111:\tlea %[var], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD2 "\n" \ - ASM_UNREACHABLE \ - ".popsection\n" \ - "113:\n" \ - _ASM_EXTABLE_REFCOUNT(112b, 113b) - -/* Trigger refcount exception if refcount result is negative. */ -#define REFCOUNT_CHECK_LT_ZERO \ - "js 111f\n\t" \ - _REFCOUNT_EXCEPTION - -/* Trigger refcount exception if refcount result is zero or negative. */ -#define REFCOUNT_CHECK_LE_ZERO \ - "jz 111f\n\t" \ - REFCOUNT_CHECK_LT_ZERO - -/* Trigger refcount exception unconditionally. */ -#define REFCOUNT_ERROR \ - "jmp 111f\n\t" \ - _REFCOUNT_EXCEPTION - -static __always_inline void refcount_add(unsigned int i, refcount_t *r) -{ - asm volatile(LOCK_PREFIX "addl %1,%0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : "ir" (i) - : "cc", "cx"); -} - -static __always_inline void refcount_inc(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "incl %0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline void refcount_dec(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "decl %0\n\t" - REFCOUNT_CHECK_LE_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline __must_check -bool refcount_sub_and_test(unsigned int i, refcount_t *r) -{ - return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "er", i, "cx"); -} - -static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) -{ - return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "cx"); -} - -static __always_inline __must_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - int c, result; - - c = atomic_read(&(r->refs)); - do { - if (unlikely(c == 0)) - return false; - - result = c + i; - - /* Did we try to increment from/to an undesirable state? */ - if (unlikely(c < 0 || c == INT_MAX || result < c)) { - asm volatile(REFCOUNT_ERROR - : : [var] "m" (r->refs.counter) - : "cc", "cx"); - break; - } - - } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); - - return c != 0; -} - -static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) -{ - return refcount_add_not_zero(1, r); -} - -#endif diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h deleted file mode 100644 index 6847d85400a8..000000000000 --- a/arch/x86/include/asm/required-features.h +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#define _ASM_X86_REQUIRED_FEATURES_H - -/* Define minimum CPUID feature set for kernel These bits are checked - really early to actually display a visible error message before the - kernel dies. Make sure to assign features to the proper mask! - - Some requirements that are not in CPUID yet are also in the - CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. - - The real information is in arch/x86/Kconfig.cpu, this just converts - the CONFIGs into a bitmask */ - -#ifndef CONFIG_MATH_EMULATION -# define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) -#else -# define NEED_FPU 0 -#endif - -#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) -# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) -#else -# define NEED_PAE 0 -#endif - -#ifdef CONFIG_X86_CMPXCHG64 -# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) -#else -# define NEED_CX8 0 -#endif - -#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) -# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) -#else -# define NEED_CMOV 0 -#endif - -#ifdef CONFIG_X86_USE_3DNOW -# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) -#else -# define NEED_3DNOW 0 -#endif - -#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) -# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) -#else -# define NEED_NOPL 0 -#endif - -#ifdef CONFIG_MATOM -# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) -#else -# define NEED_MOVBE 0 -#endif - -#ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT -/* Paravirtualized systems may not have PSE or PGE available */ -#define NEED_PSE 0 -#define NEED_PGE 0 -#else -#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) -#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) -#endif -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) -#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) -#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -#define NEED_LM (1<<(X86_FEATURE_LM & 31)) -#else -#define NEED_PSE 0 -#define NEED_MSR 0 -#define NEED_PGE 0 -#define NEED_FXSR 0 -#define NEED_XMM 0 -#define NEED_XMM2 0 -#define NEED_LM 0 -#endif - -#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ - NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ - NEED_XMM|NEED_XMM2) -#define SSE_MASK (NEED_XMM|NEED_XMM2) - -#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) - -#define REQUIRED_MASK2 0 -#define REQUIRED_MASK3 (NEED_NOPL) -#define REQUIRED_MASK4 (NEED_MOVBE) -#define REQUIRED_MASK5 0 -#define REQUIRED_MASK6 0 -#define REQUIRED_MASK7 0 -#define REQUIRED_MASK8 0 -#define REQUIRED_MASK9 0 -#define REQUIRED_MASK10 0 -#define REQUIRED_MASK11 0 -#define REQUIRED_MASK12 0 -#define REQUIRED_MASK13 0 -#define REQUIRED_MASK14 0 -#define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 0 -#define REQUIRED_MASK17 0 -#define REQUIRED_MASK18 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) - -#endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h new file mode 100644 index 000000000000..575f8408a9e7 --- /dev/null +++ b/arch/x86/include/asm/resctrl.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_RESCTRL_H +#define _ASM_X86_RESCTRL_H + +#ifdef CONFIG_X86_CPU_RESCTRL + +#include <linux/jump_label.h> +#include <linux/percpu.h> +#include <linux/resctrl_types.h> +#include <linux/sched.h> + +#include <asm/msr.h> + +/* + * This value can never be a valid CLOSID, and is used when mapping a + * (closid, rmid) pair to an index and back. On x86 only the RMID is + * needed. The index is a software defined value. + */ +#define X86_RESCTRL_EMPTY_CLOSID ((u32)~0) + +/** + * struct resctrl_pqr_state - State cache for the PQR MSR + * @cur_rmid: The cached Resource Monitoring ID + * @cur_closid: The cached Class Of Service ID + * @default_rmid: The user assigned Resource Monitoring ID + * @default_closid: The user assigned cached Class Of Service ID + * + * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the + * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always + * contains both parts, so we need to cache them. This also + * stores the user configured per cpu CLOSID and RMID. + * + * The cache also helps to avoid pointless updates if the value does + * not change. + */ +struct resctrl_pqr_state { + u32 cur_rmid; + u32 cur_closid; + u32 default_rmid; + u32 default_closid; +}; + +DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); + +extern bool rdt_alloc_capable; +extern bool rdt_mon_capable; + +DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); + +static inline bool resctrl_arch_alloc_capable(void) +{ + return rdt_alloc_capable; +} + +static inline void resctrl_arch_enable_alloc(void) +{ + static_branch_enable_cpuslocked(&rdt_alloc_enable_key); + static_branch_inc_cpuslocked(&rdt_enable_key); +} + +static inline void resctrl_arch_disable_alloc(void) +{ + static_branch_disable_cpuslocked(&rdt_alloc_enable_key); + static_branch_dec_cpuslocked(&rdt_enable_key); +} + +static inline bool resctrl_arch_mon_capable(void) +{ + return rdt_mon_capable; +} + +static inline void resctrl_arch_enable_mon(void) +{ + static_branch_enable_cpuslocked(&rdt_mon_enable_key); + static_branch_inc_cpuslocked(&rdt_enable_key); +} + +static inline void resctrl_arch_disable_mon(void) +{ + static_branch_disable_cpuslocked(&rdt_mon_enable_key); + static_branch_dec_cpuslocked(&rdt_enable_key); +} + +/* + * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR + * + * Following considerations are made so that this has minimal impact + * on scheduler hot path: + * - This will stay as no-op unless we are running on an Intel SKU + * which supports resource control or monitoring and we enable by + * mounting the resctrl file system. + * - Caches the per cpu CLOSid/RMID values and does the MSR write only + * when a task with a different CLOSid/RMID is scheduled in. + * - We allocate RMIDs/CLOSids globally in order to keep this as + * simple as possible. + * Must be called with preemption disabled. + */ +static inline void __resctrl_sched_in(struct task_struct *tsk) +{ + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); + u32 closid = READ_ONCE(state->default_closid); + u32 rmid = READ_ONCE(state->default_rmid); + u32 tmp; + + /* + * If this task has a closid/rmid assigned, use it. + * Else use the closid/rmid assigned to this cpu. + */ + if (static_branch_likely(&rdt_alloc_enable_key)) { + tmp = READ_ONCE(tsk->closid); + if (tmp) + closid = tmp; + } + + if (static_branch_likely(&rdt_mon_enable_key)) { + tmp = READ_ONCE(tsk->rmid); + if (tmp) + rmid = tmp; + } + + if (closid != state->cur_closid || rmid != state->cur_rmid) { + state->cur_closid = closid; + state->cur_rmid = rmid; + wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid); + } +} + +static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) +{ + unsigned int scale = boot_cpu_data.x86_cache_occ_scale; + + /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ + val /= scale; + return val * scale; +} + +static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, + u32 rmid) +{ + WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid); + WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid); +} + +static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk, + u32 closid, u32 rmid) +{ + WRITE_ONCE(tsk->closid, closid); + WRITE_ONCE(tsk->rmid, rmid); +} + +static inline bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid) +{ + return READ_ONCE(tsk->closid) == closid; +} + +static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored, + u32 rmid) +{ + return READ_ONCE(tsk->rmid) == rmid; +} + +static inline void resctrl_arch_sched_in(struct task_struct *tsk) +{ + if (static_branch_likely(&rdt_enable_key)) + __resctrl_sched_in(tsk); +} + +static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid) +{ + *rmid = idx; + *closid = X86_RESCTRL_EMPTY_CLOSID; +} + +static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid) +{ + return rmid; +} + +/* x86 can always read an rmid, nothing needs allocating */ +struct rdt_resource; +static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, + enum resctrl_event_id evtid) +{ + might_sleep(); + return NULL; +} + +static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, + enum resctrl_event_id evtid, + void *ctx) { } + +void resctrl_cpu_detect(struct cpuinfo_x86 *c); + +#else + +static inline void resctrl_arch_sched_in(struct task_struct *tsk) {} +static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} + +#endif /* CONFIG_X86_CPU_RESCTRL */ + +#endif /* _ASM_X86_RESCTRL_H */ diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h deleted file mode 100644 index 40ebddde6ac2..000000000000 --- a/arch/x86/include/asm/resctrl_sched.h +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_RESCTRL_SCHED_H -#define _ASM_X86_RESCTRL_SCHED_H - -#ifdef CONFIG_X86_RESCTRL - -#include <linux/sched.h> -#include <linux/jump_label.h> - -#define IA32_PQR_ASSOC 0x0c8f - -/** - * struct resctrl_pqr_state - State cache for the PQR MSR - * @cur_rmid: The cached Resource Monitoring ID - * @cur_closid: The cached Class Of Service ID - * @default_rmid: The user assigned Resource Monitoring ID - * @default_closid: The user assigned cached Class Of Service ID - * - * The upper 32 bits of IA32_PQR_ASSOC contain closid and the - * lower 10 bits rmid. The update to IA32_PQR_ASSOC always - * contains both parts, so we need to cache them. This also - * stores the user configured per cpu CLOSID and RMID. - * - * The cache also helps to avoid pointless updates if the value does - * not change. - */ -struct resctrl_pqr_state { - u32 cur_rmid; - u32 cur_closid; - u32 default_rmid; - u32 default_closid; -}; - -DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); - -DECLARE_STATIC_KEY_FALSE(rdt_enable_key); -DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); -DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); - -/* - * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR - * - * Following considerations are made so that this has minimal impact - * on scheduler hot path: - * - This will stay as no-op unless we are running on an Intel SKU - * which supports resource control or monitoring and we enable by - * mounting the resctrl file system. - * - Caches the per cpu CLOSid/RMID values and does the MSR write only - * when a task with a different CLOSid/RMID is scheduled in. - * - We allocate RMIDs/CLOSids globally in order to keep this as - * simple as possible. - * Must be called with preemption disabled. - */ -static void __resctrl_sched_in(void) -{ - struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); - u32 closid = state->default_closid; - u32 rmid = state->default_rmid; - - /* - * If this task has a closid/rmid assigned, use it. - * Else use the closid/rmid assigned to this cpu. - */ - if (static_branch_likely(&rdt_alloc_enable_key)) { - if (current->closid) - closid = current->closid; - } - - if (static_branch_likely(&rdt_mon_enable_key)) { - if (current->rmid) - rmid = current->rmid; - } - - if (closid != state->cur_closid || rmid != state->cur_rmid) { - state->cur_closid = closid; - state->cur_rmid = rmid; - wrmsr(IA32_PQR_ASSOC, rmid, closid); - } -} - -static inline void resctrl_sched_in(void) -{ - if (static_branch_likely(&rdt_enable_key)) - __resctrl_sched_in(); -} - -#else - -static inline void resctrl_sched_in(void) {} - -#endif /* CONFIG_X86_RESCTRL */ - -#endif /* _ASM_X86_RESCTRL_SCHED_H */ diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h deleted file mode 100644 index 0a21986d2238..000000000000 --- a/arch/x86/include/asm/rio.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Derived from include/asm-x86/mach-summit/mach_mpparse.h - * and include/asm-x86/mach-default/bios_ebda.h - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - */ - -#ifndef _ASM_X86_RIO_H -#define _ASM_X86_RIO_H - -#define RIO_TABLE_VERSION 3 - -struct rio_table_hdr { - u8 version; /* Version number of this data structure */ - u8 num_scal_dev; /* # of Scalability devices */ - u8 num_rio_dev; /* # of RIO I/O devices */ -} __attribute__((packed)); - -struct scal_detail { - u8 node_id; /* Scalability Node ID */ - u32 CBAR; /* Address of 1MB register space */ - u8 port0node; /* Node ID port connected to: 0xFF=None */ - u8 port0port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port1node; /* Node ID port connected to: 0xFF = None */ - u8 port1port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port2node; /* Node ID port connected to: 0xFF = None */ - u8 port2port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - u8 node_id; /* RIO Node ID */ - u32 BBAR; /* Address of 1MB register space */ - u8 type; /* Type of device */ - u8 owner_id; /* Node ID of Hurricane that owns this */ - /* node */ - u8 port0node; /* Node ID port connected to: 0xFF=None */ - u8 port0port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port1node; /* Node ID port connected to: 0xFF=None */ - u8 port1port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 first_slot; /* Lowest slot number below this Calgary */ - u8 status; /* Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie: */ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - u8 WP_index; /* instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - u8 chassis_num; /* 1 based Chassis number */ -} __attribute__((packed)); - -enum { - HURR_SCALABILTY = 0, /* Hurricane Scalability info */ - HURR_RIOIB = 2, /* Hurricane RIOIB info */ - COMPAT_CALGARY = 4, /* Compatibility Calgary */ - ALT_CALGARY = 5, /* Second Planar Calgary */ -}; - -#endif /* _ASM_X86_RIO_H */ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 8a9eba191516..54c8fc430684 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,53 +2,26 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -/* This counts to 12. Any more, it will return 13th argument. */ -#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -#define __RMWcc_CONCAT(a, b) a ## b -#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b) +#include <linux/args.h> #define __CLOBBERS_MEM(clb...) "memory", ## clb -#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO) - -/* Use asm goto */ - -#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ -({ \ - bool c = false; \ - asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ - : : [var] "m" (_var), ## __VA_ARGS__ \ - : clobbers : cc_label); \ - if (0) { \ -cc_label: c = true; \ - } \ - c; \ -}) - -#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ - -/* Use flags output or a set instruction */ - #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c; \ - asm volatile (fullop CC_SET(cc) \ - : [var] "+m" (_var), CC_OUT(cc) (c) \ + asm_inline volatile (fullop \ + : [var] "+m" (_var), "=@cc" #cc (c) \ : __VA_ARGS__ : clobbers); \ c; \ }) -#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ - #define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) #define GEN_UNARY_RMWcc_3(op, var, cc) \ GEN_UNARY_RMWcc_4(op, var, cc, "%[var]") -#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X) +#define GEN_UNARY_RMWcc(X...) CONCATENATE(GEN_UNARY_RMWcc_, COUNT_ARGS(X))(X) #define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \ __GEN_RMWcc(op " %[val], " arg0, var, cc, \ @@ -57,7 +30,7 @@ cc_label: c = true; \ #define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \ GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]") -#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X) +#define GEN_BINARY_RMWcc(X...) CONCATENATE(GEN_BINARY_RMWcc_, COUNT_ARGS(X))(X) #define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \ __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \ diff --git a/arch/x86/include/asm/rqspinlock.h b/arch/x86/include/asm/rqspinlock.h new file mode 100644 index 000000000000..24a885449ee6 --- /dev/null +++ b/arch/x86/include/asm/rqspinlock.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_RQSPINLOCK_H +#define _ASM_X86_RQSPINLOCK_H + +#include <asm/paravirt.h> + +#ifdef CONFIG_PARAVIRT +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); + +#define resilient_virt_spin_lock_enabled resilient_virt_spin_lock_enabled +static __always_inline bool resilient_virt_spin_lock_enabled(void) +{ + return static_branch_likely(&virt_spin_lock_key); +} + +#ifdef CONFIG_QUEUED_SPINLOCKS +typedef struct qspinlock rqspinlock_t; +#else +typedef struct rqspinlock rqspinlock_t; +#endif +extern int resilient_tas_spin_lock(rqspinlock_t *lock); + +#define resilient_virt_spin_lock resilient_virt_spin_lock +static inline int resilient_virt_spin_lock(rqspinlock_t *lock) +{ + return resilient_tas_spin_lock(lock); +} + +#endif /* CONFIG_PARAVIRT */ + +#include <asm-generic/rqspinlock.h> + +#endif /* _ASM_X86_RQSPINLOCK_H */ diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h new file mode 100644 index 000000000000..e5a13dc8816e --- /dev/null +++ b/arch/x86/include/asm/runtime-const.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + +#ifdef __ASSEMBLY__ + +.macro RUNTIME_CONST_PTR sym reg + movq $0x0123456789abcdef, %\reg + 1: + .pushsection runtime_ptr_\sym, "a" + .long 1b - 8 - . + .popsection +.endm + +#else /* __ASSEMBLY__ */ + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("mov %1,%0\n1:\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - %c2 - .\n" \ + ".popsection" \ + :"=r" (__ret) \ + :"i" ((unsigned long)0x0123456789abcdefull), \ + "i" (sizeof(long))); \ + __ret; }) + +// The 'typeof' will create at _least_ a 32-bit type, but +// will happily also take a bigger type and the 'shrl' will +// clear the upper bits +#define runtime_const_shift_right_32(val, sym) ({ \ + typeof(0u+(val)) __ret = (val); \ + asm_inline("shrl $12,%k0\n1:\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - 1 - .\n" \ + ".popsection" \ + :"+r" (__ret)); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* + * The text patching is trivial - you can only do this at init time, + * when the text section hasn't been marked RO, and before the text + * has ever been executed. + */ +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + *(unsigned long *)where = val; +} + +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + *(unsigned char *)where = val; +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h deleted file mode 100644 index 4c25cf6caefa..000000000000 --- a/arch/x86/include/asm/rwsem.h +++ /dev/null @@ -1,237 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+ - * - * Written by David Howells (dhowells@redhat.com). - * - * Derived from asm-x86/semaphore.h - * - * - * The MSW of the count is the negated number of active writers and waiting - * lockers, and the LSW is the total number of active locks - * - * The lock count is initialized to 0 (no active and no waiting lockers). - * - * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an - * uncontended lock. This can be determined because XADD returns the old value. - * Readers increment by 1 and see a positive value when uncontended, negative - * if there are writers (and maybe) readers waiting (in which case it goes to - * sleep). - * - * The value of WAITING_BIAS supports up to 32766 waiting processes. This can - * be extended to 65534 by manually checking the whole MSW rather than relying - * on the S flag. - * - * The value of ACTIVE_BIAS supports up to 65535 active processes. - * - * This should be totally fair - if anything is waiting, a process that wants a - * lock will go to the back of the queue. When the currently active lock is - * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consecutive readers at the - * front, then they'll all be woken up, but no other readers will be. - */ - -#ifndef _ASM_X86_RWSEM_H -#define _ASM_X86_RWSEM_H - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#ifdef __KERNEL__ -#include <asm/asm.h> - -/* - * The bias values and the counter type limits the number of - * potential readers/writers to 32767 for 32 bits and 2147483647 - * for 64 bits. - */ - -#ifdef CONFIG_X86_64 -# define RWSEM_ACTIVE_MASK 0xffffffffL -#else -# define RWSEM_ACTIVE_MASK 0x0000ffffL -#endif - -#define RWSEM_UNLOCKED_VALUE 0x00000000L -#define RWSEM_ACTIVE_BIAS 0x00000001L -#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - -/* - * lock for reading - */ -#define ____down_read(sem, slow_path) \ -({ \ - struct rw_semaphore* ret; \ - asm volatile("# beginning down_read\n\t" \ - LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \ - /* adds 0x00000001 */ \ - " jns 1f\n" \ - " call " slow_path "\n" \ - "1:\n\t" \ - "# ending down_read\n\t" \ - : "+m" (sem->count), "=a" (ret), \ - ASM_CALL_CONSTRAINT \ - : [sem] "a" (sem) \ - : "memory", "cc"); \ - ret; \ -}) - -static inline void __down_read(struct rw_semaphore *sem) -{ - ____down_read(sem, "call_rwsem_down_read_failed"); -} - -static inline int __down_read_killable(struct rw_semaphore *sem) -{ - if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable"))) - return -EINTR; - return 0; -} - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -static inline bool __down_read_trylock(struct rw_semaphore *sem) -{ - long result, tmp; - asm volatile("# beginning __down_read_trylock\n\t" - " mov %[count],%[result]\n\t" - "1:\n\t" - " mov %[result],%[tmp]\n\t" - " add %[inc],%[tmp]\n\t" - " jle 2f\n\t" - LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t" - " jnz 1b\n\t" - "2:\n\t" - "# ending __down_read_trylock\n\t" - : [count] "+m" (sem->count), [result] "=&a" (result), - [tmp] "=&r" (tmp) - : [inc] "i" (RWSEM_ACTIVE_READ_BIAS) - : "memory", "cc"); - return result >= 0; -} - -/* - * lock for writing - */ -#define ____down_write(sem, slow_path) \ -({ \ - long tmp; \ - struct rw_semaphore* ret; \ - \ - asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \ - /* adds 0xffff0001, returns the old value */ \ - " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ - /* was the active mask 0 before? */\ - " jz 1f\n" \ - " call " slow_path "\n" \ - "1:\n" \ - "# ending down_write" \ - : "+m" (sem->count), [tmp] "=d" (tmp), \ - "=a" (ret), ASM_CALL_CONSTRAINT \ - : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \ - : "memory", "cc"); \ - ret; \ -}) - -static inline void __down_write(struct rw_semaphore *sem) -{ - ____down_write(sem, "call_rwsem_down_write_failed"); -} - -static inline int __down_write_killable(struct rw_semaphore *sem) -{ - if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable"))) - return -EINTR; - - return 0; -} - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -static inline bool __down_write_trylock(struct rw_semaphore *sem) -{ - bool result; - long tmp0, tmp1; - asm volatile("# beginning __down_write_trylock\n\t" - " mov %[count],%[tmp0]\n\t" - "1:\n\t" - " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" - /* was the active mask 0 before? */ - " jnz 2f\n\t" - " mov %[tmp0],%[tmp1]\n\t" - " add %[inc],%[tmp1]\n\t" - LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t" - " jnz 1b\n\t" - "2:\n\t" - CC_SET(e) - "# ending __down_write_trylock\n\t" - : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0), - [tmp1] "=&r" (tmp1), CC_OUT(e) (result) - : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS) - : "memory"); - return result; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - long tmp; - asm volatile("# beginning __up_read\n\t" - LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" - /* subtracts 1, returns the old value */ - " jns 1f\n\t" - " call call_rwsem_wake\n" /* expects old value in %edx */ - "1:\n" - "# ending __up_read\n" - : "+m" (sem->count), [tmp] "=d" (tmp) - : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS) - : "memory", "cc"); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - long tmp; - asm volatile("# beginning __up_write\n\t" - LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" - /* subtracts 0xffff0001, returns the old value */ - " jns 1f\n\t" - " call call_rwsem_wake\n" /* expects old value in %edx */ - "1:\n\t" - "# ending __up_write\n" - : "+m" (sem->count), [tmp] "=d" (tmp) - : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS) - : "memory", "cc"); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - asm volatile("# beginning __downgrade_write\n\t" - LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t" - /* - * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) - * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) - */ - " jns 1f\n\t" - " call call_rwsem_downgrade_wake\n" - "1:\n\t" - "# ending __downgrade_write\n" - : "+m" (sem->count) - : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS) - : "memory", "cc"); -} - -#endif /* __KERNEL__ */ -#endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h index 2bd1338de236..42bcd42d70d1 100644 --- a/arch/x86/include/asm/seccomp.h +++ b/arch/x86/include/asm/seccomp.h @@ -9,13 +9,33 @@ #endif #ifdef CONFIG_COMPAT -#include <asm/ia32_unistd.h> +#include <asm/unistd_32_ia32.h> #define __NR_seccomp_read_32 __NR_ia32_read #define __NR_seccomp_write_32 __NR_ia32_write #define __NR_seccomp_exit_32 __NR_ia32_exit #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn #endif +#ifdef CONFIG_X86_64 +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_X86_64 +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "x86_64" +# ifdef CONFIG_COMPAT +# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_I386 +# define SECCOMP_ARCH_COMPAT_NR IA32_NR_syscalls +# define SECCOMP_ARCH_COMPAT_NAME "ia32" +# endif +/* + * x32 will have __X32_SYSCALL_BIT set in syscall number. We don't support + * caching them and they are treated as out of range syscalls, which will + * always pass through the BPF filter. + */ +#else /* !CONFIG_X86_64 */ +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_I386 +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "ia32" +#endif + #include <asm-generic/seccomp.h> #endif /* _ASM_X86_SECCOMP_H */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 8ea1cfdbeabc..30e8ee7006f9 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -5,12 +5,16 @@ #include <asm-generic/sections.h> #include <asm/extable.h> +extern char __relocate_kernel_start[], __relocate_kernel_end[]; extern char __brk_base[], __brk_limit[]; -extern struct exception_table_entry __stop___ex_table[]; extern char __end_rodata_aligned[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; #endif +extern char __end_of_kernel_reserve[]; + +extern unsigned long _brk_start, _brk_end; + #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index ac3892920419..f59ae7186940 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -4,6 +4,7 @@ #include <linux/const.h> #include <asm/alternative.h> +#include <asm/ibt.h> /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -31,6 +32,18 @@ */ #define SEGMENT_RPL_MASK 0x3 +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK 0x2 + /* User mode is privilege level 3: */ #define USER_RPL 0x3 @@ -43,7 +56,7 @@ #define GDT_ENTRY_INVALID_SEG 0 -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64) /* * The layout of the per-CPU GDT under Linux: * @@ -83,7 +96,7 @@ * * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8 + * 28 - VDSO getcpu * 29 - unused * 30 - unused * 31 - TSS for double fault handler @@ -106,7 +119,7 @@ #define GDT_ENTRY_ESPFIX_SS 26 #define GDT_ENTRY_PERCPU 27 -#define GDT_ENTRY_STACK_CANARY 28 +#define GDT_ENTRY_CPUNODE 28 #define GDT_ENTRY_DOUBLEFAULT_TSS 31 @@ -123,6 +136,7 @@ #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) +#define __USER32_CS __USER_CS #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) /* segment for calling fn: */ @@ -146,11 +160,7 @@ # define __KERNEL_PERCPU 0 #endif -#ifdef CONFIG_STACKPROTECTOR -# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) -#else -# define __KERNEL_STACK_CANARY 0 -#endif +#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3) #else /* 64-bit: */ @@ -204,33 +214,26 @@ #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3) #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) -#define __USER32_DS __USER_DS #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) #define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3) #endif -#ifndef CONFIG_PARAVIRT_XXL -# define get_kernel_rpl() 0 -#endif - #define IDT_ENTRIES 256 #define NUM_EXCEPTION_VECTORS 32 /* Bitmask of exception vectors which push an error code on the stack: */ -#define EXCEPTION_ERRCODE_MASK 0x00027d00 +#define EXCEPTION_ERRCODE_MASK 0x20027d00 #define GDT_SIZE (GDT_ENTRIES*8) #define GDT_ENTRY_TLS_ENTRIES 3 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) -#ifdef CONFIG_X86_64 - /* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */ #define VDSO_CPUNODE_BITS 12 #define VDSO_CPUNODE_MASK 0xfff -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Helper functions to store/load CPU and node numbers */ @@ -241,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) { - unsigned int p; + unsigned long p; /* * Load CPU and node number from the GDT. LSL is faster than RDTSCP @@ -251,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * * If RDPID is available, use it. */ - alternative_io ("lsl %[seg],%[p]", - ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + alternative_io ("lsl %[seg],%k[p]", + "rdpid %[p]", X86_FEATURE_RDPID, - [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); + [p] "=r" (p), [seg] "r" (__CPUNODE_SEG)); if (cpu) *cpu = (p & VDSO_CPUNODE_MASK); @@ -262,8 +265,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) *node = (p >> VDSO_CPUNODE_BITS); } -#endif /* !__ASSEMBLY__ */ -#endif /* CONFIG_X86_64 */ +#endif /* !__ASSEMBLER__ */ #ifdef __KERNEL__ @@ -274,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * vector has no error code (two bytes), a 'push $vector_number' (two * bytes), and a jump to the common entry code (up to five bytes). */ -#define EARLY_IDT_HANDLER_SIZE 9 +#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE) /* * xen_early_idt_handler_array is for Xen pv guests: for each entry in @@ -282,14 +284,14 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to * max 8 bytes. */ -#define XEN_EARLY_IDT_HANDLER_SIZE 8 +#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#ifdef CONFIG_XEN_PV extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; #endif @@ -306,14 +308,7 @@ do { \ \ asm volatile(" \n" \ "1: movl %k0,%%" #seg " \n" \ - \ - ".section .fixup,\"ax\" \n" \ - "2: xorl %k0,%k0 \n" \ - " jmp 1b \n" \ - ".previous \n" \ - \ - _ASM_EXTABLE(1b, 2b) \ - \ + _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\ : "+r" (__val) : : "memory"); \ } while (0) @@ -338,7 +333,7 @@ static inline void __loadsegment_fs(unsigned short value) "1: movw %0, %%fs \n" "2: \n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS) : : "rm" (value) : "memory"); } @@ -355,26 +350,7 @@ static inline void __loadsegment_fs(unsigned short value) #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") -/* - * x86-32 user GS accessors: - */ -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_32_LAZY_GS -# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; }) -# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) -# define task_user_gs(tsk) ((tsk)->thread.gs) -# define lazy_save_gs(v) savesegment(gs, (v)) -# define lazy_load_gs(v) loadsegment(gs, (v)) -# else /* X86_32_LAZY_GS */ -# define get_user_gs(regs) (u16)((regs)->gs) -# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) -# define task_user_gs(tsk) (task_pt_regs(tsk)->gs) -# define lazy_save_gs(v) do { } while (0) -# define lazy_load_gs(v) do { } while (0) -# endif /* X86_32_LAZY_GS */ -#endif /* X86_32 */ - -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __KERNEL__ */ #endif /* _ASM_X86_SEGMENT_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 07a25753e85c..61f56cdaccb5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -4,12 +4,16 @@ #include <asm/page.h> #include <asm-generic/set_memory.h> +#include <asm/pgtable.h> + +#define set_memory_rox set_memory_rox +int set_memory_rox(unsigned long addr, int numpages); /* * The set_memory_* API can be used to change various attributes of a virtual * address range. The attributes include: - * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack - * Executability : eXeutable, NoteXecutable + * Cacheability : UnCached, WriteCombining, WriteThrough, WriteBack + * Executability : eXecutable, NoteXecutable * Read/Write : ReadOnly, ReadWrite * Presence : NotPresent * Encryption : Encrypted, Decrypted @@ -34,28 +38,28 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); int set_memory_wc(unsigned long addr, int numpages); -int set_memory_wt(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); +int set_memory_p(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); + +bool set_memory_enc_stop_conversion(void); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); -int set_memory_np_noalias(unsigned long addr, int numpages); -int set_memory_array_uc(unsigned long *addr, int addrinarray); -int set_memory_array_wc(unsigned long *addr, int addrinarray); -int set_memory_array_wt(unsigned long *addr, int addrinarray); -int set_memory_array_wb(unsigned long *addr, int addrinarray); +int set_memory_np_noalias(unsigned long addr, int numpages); +int set_memory_nonglobal(unsigned long addr, int numpages); +int set_memory_global(unsigned long addr, int numpages); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); -int set_pages_array_wt(struct page **pages, int addrinarray); int set_pages_array_wb(struct page **pages, int addrinarray); /* @@ -80,55 +84,14 @@ int set_pages_array_wb(struct page **pages, int addrinarray); int set_pages_uc(struct page *page, int numpages); int set_pages_wb(struct page *page, int numpages); -int set_pages_x(struct page *page, int numpages); -int set_pages_nx(struct page *page, int numpages); int set_pages_ro(struct page *page, int numpages); int set_pages_rw(struct page *page, int numpages); -extern int kernel_set_to_readonly; -void set_kernel_text_rw(void); -void set_kernel_text_ro(void); - -#ifdef CONFIG_X86_64 -static inline int set_mce_nospec(unsigned long pfn) -{ - unsigned long decoy_addr; - int rc; +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); +bool kernel_page_present(struct page *page); - /* - * Mark the linear address as UC to make sure we don't log more - * errors because of speculative access to the page. - * We would like to just call: - * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); - * but doing that would radically increase the odds of a - * speculative access to the poison page because we'd have - * the virtual address of the kernel 1:1 mapping sitting - * around in registers. - * Instead we get tricky. We create a non-canonical address - * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_uc() properly sanitizing any __pa() - * results with __PHYSICAL_MASK or PTE_PFN_MASK. - */ - decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - - rc = set_memory_uc(decoy_addr, 1); - if (rc) - pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); - return rc; -} -#define set_mce_nospec set_mce_nospec - -/* Restore full speculative operation to the pfn. */ -static inline int clear_mce_nospec(unsigned long pfn) -{ - return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); -} -#define clear_mce_nospec clear_mce_nospec -#else -/* - * Few people would run a 32-bit kernel on a machine that supports - * recoverable errors because they have too much memory to boot 32-bit. - */ -#endif +extern int kernel_set_to_readonly; #endif /* _ASM_X86_SET_MEMORY_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ed8ec011a9fd..914eb32581c7 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/page_types.h> +#include <asm/ibt.h> #ifdef __i386__ @@ -26,12 +27,12 @@ #define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */ #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ +#include <linux/cache.h> + #include <asm/bootparam.h> #include <asm/x86_init.h> -extern u64 relocated_ramdisk; - /* Interrupt control for vSMPowered x86_64 systems */ #ifdef CONFIG_X86_64 void vsmp_init(void); @@ -39,16 +40,22 @@ void vsmp_init(void); static inline void vsmp_init(void) { } #endif +struct pt_regs; + void setup_bios_corruption_check(void); void early_platform_quirks(void); extern unsigned long saved_video_mode; +extern unsigned long acpi_realmode_flags; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); -extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp); -extern unsigned long __startup_secondary_64(void); -extern int early_make_pgtable(unsigned long address); +extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp); +extern void startup_64_setup_gdt_idt(void); +extern void startup_64_load_idt(void *vc_handler); +extern void __pi_startup_64_load_idt(void *vc_handler); +extern void early_setup_idt(void); +extern void __init do_early_exception(struct pt_regs *regs, int trapnr); #ifdef CONFIG_X86_INTEL_MID extern void x86_intel_mid_early_setup(void); @@ -62,6 +69,8 @@ extern void x86_ce4100_early_setup(void); static inline void x86_ce4100_early_setup(void) { } #endif +#include <linux/kexec_handover.h> + #ifndef _SETUP #include <asm/espfix.h> @@ -75,7 +84,17 @@ extern char _text[]; static inline bool kaslr_enabled(void) { - return !!(boot_params.hdr.loadflags & KASLR_FLAG); + return IS_ENABLED(CONFIG_RANDOMIZE_MEMORY) && + !!(boot_params.hdr.loadflags & KASLR_FLAG); +} + +/* + * Apply no randomization if KASLR was disabled at boot or if KASAN + * is enabled. KASAN shadow mappings rely on regions being PGD aligned. + */ +static inline bool kaslr_memory_enabled(void) +{ + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); } static inline unsigned long kaslr_offset(void) @@ -94,50 +113,51 @@ extern unsigned long _brk_end; void *extend_brk(size_t size, size_t align); /* - * Reserve space in the brk section. The name must be unique within - * the file, and somewhat descriptive. The size is in bytes. Must be - * used at file scope. + * Reserve space in the .brk section, which is a block of memory from which the + * caller is allowed to allocate very early (before even memblock is available) + * by calling extend_brk(). All allocated memory will be eventually converted + * to memblock. Any leftover unallocated memory will be freed. * - * (This uses a temp function to wrap the asm so we can pass it the - * size parameter; otherwise we wouldn't be able to. We can't use a - * "section" attribute on a normal variable because it always ends up - * being @progbits, which ends up allocating space in the vmlinux - * executable.) + * The size is in bytes. */ -#define RESERVE_BRK(name,sz) \ - static void __section(.discard.text) __used notrace \ - __brk_reservation_fn_##name##__(void) { \ - asm volatile ( \ - ".pushsection .brk_reservation,\"aw\",@nobits;" \ - ".brk." #name ":" \ - " 1:.skip %c0;" \ - " .size .brk." #name ", . - 1b;" \ - " .popsection" \ - : : "i" (sz)); \ - } - -/* Helper for reserving space for arrays of things */ -#define RESERVE_BRK_ARRAY(type, name, entries) \ - type *name; \ - RESERVE_BRK(name, sizeof(type) * entries) +#define RESERVE_BRK(name, size) \ + __section(".bss..brk") __aligned(1) __used \ + static char __brk_##name[size] extern void probe_roms(void); + +void clear_bss(void); + #ifdef __i386__ -asmlinkage void __init i386_start_kernel(void); +asmlinkage void __init __noreturn i386_start_kernel(void); +void __init mk_early_pgtbl_32(void); #else -asmlinkage void __init x86_64_start_kernel(char *real_mode); -asmlinkage void __init x86_64_start_reservations(char *real_mode_data); +asmlinkage void __init __noreturn x86_64_start_kernel(char *real_mode); +asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data); #endif /* __i386__ */ #endif /* _SETUP */ + +#ifdef CONFIG_CMDLINE_BOOL +extern bool builtin_cmdline_added __ro_after_init; #else -#define RESERVE_BRK(name,sz) \ - .pushsection .brk_reservation,"aw",@nobits; \ -.brk.name: \ -1: .skip sz; \ - .size .brk.name,.-1b; \ +#define builtin_cmdline_added 0 +#endif + +#else /* __ASSEMBLER__ */ + +.macro __RESERVE_BRK name, size + .pushsection .bss..brk, "aw" +SYM_DATA_START(__brk_\name) + .skip \size +SYM_DATA_END(__brk_\name) .popsection -#endif /* __ASSEMBLY__ */ +.endm + +#define RESERVE_BRK(name, size) __RESERVE_BRK name, size + +#endif /* __ASSEMBLER__ */ + #endif /* _ASM_X86_SETUP_H */ diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h new file mode 100644 index 000000000000..7bb16f843c93 --- /dev/null +++ b/arch/x86/include/asm/setup_data.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SETUP_DATA_H +#define _ASM_X86_SETUP_DATA_H + +#include <uapi/asm/setup_data.h> + +#ifndef __ASSEMBLER__ + +struct pci_setup_rom { + struct setup_data data; + uint16_t vendor; + uint16_t devid; + uint64_t pcilen; + unsigned long segment; + unsigned long bus; + unsigned long device; + unsigned long function; + uint8_t romdata[]; +}; + +/* kexec external ABI */ +struct efi_setup_data { + u64 fw_vendor; + u64 __unused; + u64 tables; + u64 smbios; + u64 reserved[8]; +}; + +#endif /* __ASSEMBLER__ */ + +#endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h new file mode 100644 index 000000000000..01a6e4dbe423 --- /dev/null +++ b/arch/x86/include/asm/sev-common.h @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header common between the guest and the hypervisor. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#ifndef __ASM_X86_SEV_COMMON_H +#define __ASM_X86_SEV_COMMON_H + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_DATA_LOW 12 +#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1) + +#define GHCB_DATA(v) \ + (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW) + +/* SEV Information Request/Response */ +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 + +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + /* GHCBData[63:48] */ \ + ((((_max) & 0xffff) << 48) | \ + /* GHCBData[47:32] */ \ + (((_min) & 0xffff) << 32) | \ + /* GHCBData[31:24] */ \ + (((_cbit) & 0xff) << 24) | \ + GHCB_MSR_SEV_INFO_RESP) + +#define GHCB_MSR_INFO(v) ((v) & 0xfffUL) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> 48) & 0xffff) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> 32) & 0xffff) + +/* CPUID Request/Response */ +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) \ + /* GHCBData[11:0] */ \ + (GHCB_MSR_CPUID_REQ | \ + /* GHCBData[31:12] */ \ + (((unsigned long)(reg) & 0x3) << 30) | \ + /* GHCBData[63:32] */ \ + (((unsigned long)fn) << 32)) + +/* AP Reset Hold */ +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0) + +/* Preferred GHCB GPA Request */ +#define GHCB_MSR_PREF_GPA_REQ 0x010 +#define GHCB_MSR_GPA_VALUE_POS 12 +#define GHCB_MSR_GPA_VALUE_MASK GENMASK_ULL(51, 0) + +#define GHCB_MSR_PREF_GPA_RESP 0x011 +#define GHCB_MSR_PREF_GPA_NONE 0xfffffffffffff + +/* GHCB GPA Register */ +#define GHCB_MSR_REG_GPA_REQ 0x012 +#define GHCB_MSR_REG_GPA_REQ_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)((v) & GENMASK_ULL(51, 0)) << 12) | \ + /* GHCBData[11:0] */ \ + GHCB_MSR_REG_GPA_REQ) + +#define GHCB_MSR_REG_GPA_RESP 0x013 +#define GHCB_MSR_REG_GPA_RESP_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) + +/* + * SNP Page State Change Operation + * + * GHCBData[55:52] - Page operation: + * 0x0001 Page assignment, Private + * 0x0002 Page assignment, Shared + */ +enum psc_op { + SNP_PAGE_STATE_PRIVATE = 1, + SNP_PAGE_STATE_SHARED, +}; + +#define GHCB_MSR_PSC_REQ 0x014 +#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \ + /* GHCBData[55:52] */ \ + (((u64)((op) & 0xf) << 52) | \ + /* GHCBData[51:12] */ \ + ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) | \ + /* GHCBData[11:0] */ \ + GHCB_MSR_PSC_REQ) + +#define GHCB_MSR_PSC_REQ_TO_GFN(msr) (((msr) & GENMASK_ULL(51, 12)) >> 12) +#define GHCB_MSR_PSC_REQ_TO_OP(msr) (((msr) & GENMASK_ULL(55, 52)) >> 52) + +#define GHCB_MSR_PSC_RESP 0x015 +#define GHCB_MSR_PSC_RESP_VAL(val) \ + /* GHCBData[63:32] */ \ + (((u64)(val) & GENMASK_ULL(63, 32)) >> 32) + +/* Set highest bit as a generic error response */ +#define GHCB_MSR_PSC_RESP_ERROR (BIT_ULL(63) | GHCB_MSR_PSC_RESP) + +/* GHCB Run at VMPL Request/Response */ +#define GHCB_MSR_VMPL_REQ 0x016 +#define GHCB_MSR_VMPL_REQ_LEVEL(v) \ + /* GHCBData[39:32] */ \ + ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \ + /* GHCBDdata[11:0] */ \ + GHCB_MSR_VMPL_REQ) + +#define GHCB_MSR_VMPL_RESP 0x017 +#define GHCB_MSR_VMPL_RESP_VAL(v) \ + /* GHCBData[63:32] */ \ + (((u64)(v) & GENMASK_ULL(63, 32)) >> 32) + +/* GHCB Hypervisor Feature Request/Response */ +#define GHCB_MSR_HV_FT_REQ 0x080 +#define GHCB_MSR_HV_FT_RESP 0x081 +#define GHCB_MSR_HV_FT_POS 12 +#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0) +#define GHCB_MSR_HV_FT_RESP_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) + +#define GHCB_HV_FT_SNP BIT_ULL(0) +#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) +#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5) + +/* + * SNP Page State Change NAE event + * The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which + * is a local stack variable in set_pages_state(). Do not increase this value + * without evaluating the impact to stack usage. + * + * Use VMGEXIT_PSC_MAX_COUNT in cases where the actual GHCB-defined max value + * is needed, such as when processing GHCB requests on the hypervisor side. + */ +#define VMGEXIT_PSC_MAX_ENTRY 64 +#define VMGEXIT_PSC_MAX_COUNT 253 + +#define VMGEXIT_PSC_ERROR_GENERIC (0x100UL << 32) +#define VMGEXIT_PSC_ERROR_INVALID_HDR ((1UL << 32) | 1) +#define VMGEXIT_PSC_ERROR_INVALID_ENTRY ((1UL << 32) | 2) + +#define VMGEXIT_PSC_OP_PRIVATE 1 +#define VMGEXIT_PSC_OP_SHARED 2 + +struct psc_hdr { + u16 cur_entry; + u16 end_entry; + u32 reserved; +} __packed; + +struct psc_entry { + u64 cur_page : 12, + gfn : 40, + operation : 4, + pagesize : 1, + reserved : 7; +} __packed; + +struct snp_psc_desc { + struct psc_hdr hdr; + struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; +} __packed; + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff + +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + /* GHCBData[15:12] */ \ + (((((u64)reason_set) & 0xf) << 12) | \ + /* GHCBData[23:16] */ \ + ((((u64)reason_val) & 0xff) << 16)) + +/* Error codes from reason set 0 */ +#define SEV_TERM_SET_GEN 0 +#define GHCB_SEV_ES_GEN_REQ 0 +#define GHCB_SEV_ES_PROT_UNSUPPORTED 1 +#define GHCB_SNP_UNSUPPORTED 2 + +/* Linux-specific reason codes (used with reason set 1) */ +#define SEV_TERM_SET_LINUX 1 +#define GHCB_TERM_REGISTER 0 /* GHCB GPA registration failure */ +#define GHCB_TERM_PSC 1 /* Page State Change failure */ +#define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */ +#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */ +#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */ +#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */ +#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */ +#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */ +#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */ +#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */ +#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */ +#define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */ +#define GHCB_TERM_SAVIC_FAIL 12 /* Secure AVIC-specific failure */ + +#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) + +/* + * GHCB-defined return codes that are communicated back to the guest via + * SW_EXITINFO1. + */ +#define GHCB_HV_RESP_NO_ACTION 0 +#define GHCB_HV_RESP_ISSUE_EXCEPTION 1 +#define GHCB_HV_RESP_MALFORMED_INPUT 2 + +/* + * GHCB-defined sub-error codes for malformed input (see above) that are + * communicated back to the guest via SW_EXITINFO2[31:0]. + */ +#define GHCB_ERR_NOT_REGISTERED 1 +#define GHCB_ERR_INVALID_USAGE 2 +#define GHCB_ERR_INVALID_SCRATCH_AREA 3 +#define GHCB_ERR_MISSING_INPUT 4 +#define GHCB_ERR_INVALID_INPUT 5 +#define GHCB_ERR_INVALID_EVENT 6 + +struct sev_config { + __u64 debug : 1, + + /* + * Indicates when the per-CPU GHCB has been created and registered + * and thus can be used by the BSP instead of the early boot GHCB. + * + * For APs, the per-CPU GHCB is created before they are started + * and registered upon startup, so this flag can be used globally + * for the BSP and APs. + */ + ghcbs_initialized : 1, + + /* + * Indicates when the per-CPU SVSM CA is to be used instead of the + * boot SVSM CA. + * + * For APs, the per-CPU SVSM CA is created as part of the AP + * bringup, so this flag can be used globally for the BSP and APs. + */ + use_cas : 1, + + __reserved : 61; +}; + +extern struct sev_config sev_cfg; + +#endif diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h new file mode 100644 index 000000000000..c58c47c68ab6 --- /dev/null +++ b/arch/x86/include/asm/sev-internal.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define DR7_RESET_VALUE 0x400 + +extern u64 sev_hv_features; +extern u64 sev_secrets_pa; + +/* #VC handler runtime per-CPU data */ +struct sev_es_runtime_data { + struct ghcb ghcb_page; + + /* + * Reserve one page per CPU as backup storage for the unencrypted GHCB. + * It is needed when an NMI happens while the #VC handler uses the real + * GHCB, and the NMI handler itself is causing another #VC exception. In + * that case the GHCB content of the first handler needs to be backed up + * and restored. + */ + struct ghcb backup_ghcb; + + /* + * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. + * There is no need for it to be atomic, because nothing is written to + * the GHCB between the read and the write of ghcb_active. So it is safe + * to use it when a nested #VC exception happens before the write. + * + * This is necessary for example in the #VC->NMI->#VC case when the NMI + * happens while the first #VC handler uses the GHCB. When the NMI code + * raises a second #VC handler it might overwrite the contents of the + * GHCB written by the first handler. To avoid this the content of the + * GHCB is saved and restored when the GHCB is detected to be in use + * already. + */ + bool ghcb_active; + bool backup_ghcb_active; + + /* + * Cached DR7 value - write it on DR7 writes and return it on reads. + * That value will never make it to the real hardware DR7 as debugging + * is currently unsupported in SEV-ES guests. + */ + unsigned long dr7; +}; + +struct ghcb_state { + struct ghcb *ghcb; +}; + +extern struct svsm_ca boot_svsm_ca_page; + +struct ghcb *__sev_get_ghcb(struct ghcb_state *state); +void __sev_put_ghcb(struct ghcb_state *state); + +DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data); +DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa); + +void early_set_pages_state(unsigned long vaddr, unsigned long paddr, + unsigned long npages, const struct psc_desc *desc); + +DECLARE_PER_CPU(struct svsm_ca *, svsm_caa); +DECLARE_PER_CPU(u64, svsm_caa_pa); + +extern u64 boot_svsm_caa_pa; + +enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt); +void vc_forward_exception(struct es_em_ctxt *ctxt); + +static inline u64 sev_es_rd_ghcb_msr(void) +{ + return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB); +} + +static __always_inline void sev_es_wr_ghcb_msr(u64 val) +{ + u32 low, high; + + low = (u32)(val); + high = (u32)(val >> 32); + + native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); +} + +enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write); + +u64 get_hv_features(void); + +const struct snp_cpuid_table *snp_cpuid_get_table(void); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h new file mode 100644 index 000000000000..0e6c0940100f --- /dev/null +++ b/arch/x86/include/asm/sev.h @@ -0,0 +1,682 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel <jroedel@suse.de> + */ + +#ifndef __ASM_ENCRYPTED_STATE_H +#define __ASM_ENCRYPTED_STATE_H + +#include <linux/types.h> +#include <linux/sev-guest.h> + +#include <asm/insn.h> +#include <asm/sev-common.h> +#include <asm/coco.h> +#include <asm/set_memory.h> +#include <asm/svm.h> + +#define GHCB_PROTOCOL_MIN 1ULL +#define GHCB_PROTOCOL_MAX 2ULL +#define GHCB_DEFAULT_USAGE 0ULL + +#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } + +struct boot_params; + +enum es_result { + ES_OK, /* All good */ + ES_UNSUPPORTED, /* Requested operation not supported */ + ES_VMM_ERROR, /* Unexpected state from the VMM */ + ES_DECODE_FAILED, /* Instruction decoding failed */ + ES_EXCEPTION, /* Instruction caused exception */ + ES_RETRY, /* Retry instruction emulation */ +}; + +struct es_fault_info { + unsigned long vector; + unsigned long error_code; + unsigned long cr2; +}; + +struct pt_regs; + +/* ES instruction emulation context */ +struct es_em_ctxt { + struct pt_regs *regs; + struct insn insn; + struct es_fault_info fi; +}; + +/* + * AMD SEV Confidential computing blob structure. The structure is + * defined in OVMF UEFI firmware header: + * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h + */ +#define CC_BLOB_SEV_HDR_MAGIC 0x45444d41 +struct cc_blob_sev_info { + u32 magic; + u16 version; + u16 reserved; + u64 secrets_phys; + u32 secrets_len; + u32 rsvd1; + u64 cpuid_phys; + u32 cpuid_len; + u32 rsvd2; +} __packed; + +void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code); + +static inline u64 lower_bits(u64 val, unsigned int bits) +{ + u64 mask = (1ULL << bits) - 1; + + return (val & mask); +} + +struct real_mode_header; +enum stack_type; + +/* Early IDT entry points for #VC handler */ +extern void vc_no_ghcb(void); +extern void vc_boot_ghcb(void); +extern bool handle_vc_boot_ghcb(struct pt_regs *regs); + +/* + * Individual entries of the SNP CPUID table, as defined by the SNP + * Firmware ABI, Revision 0.9, Section 7.1, Table 14. + */ +struct snp_cpuid_fn { + u32 eax_in; + u32 ecx_in; + u64 xcr0_in; + u64 xss_in; + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u64 __reserved; +} __packed; + +/* + * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9, + * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit + * of 64 entries per CPUID table. + */ +#define SNP_CPUID_COUNT_MAX 64 + +struct snp_cpuid_table { + u32 count; + u32 __reserved1; + u64 __reserved2; + struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX]; +} __packed; + +/* PVALIDATE return codes */ +#define PVALIDATE_FAIL_SIZEMISMATCH 6 + +/* Software defined (when rFlags.CF = 1) */ +#define PVALIDATE_FAIL_NOUPDATE 255 + +/* RMUPDATE detected 4K page and 2MB page overlap. */ +#define RMPUPDATE_FAIL_OVERLAP 4 + +/* PSMASH failed due to concurrent access by another CPU */ +#define PSMASH_FAIL_INUSE 3 + +/* RMP page size */ +#define RMP_PG_SIZE_4K 0 +#define RMP_PG_SIZE_2M 1 +#define RMP_TO_PG_LEVEL(level) (((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M) +#define PG_LEVEL_TO_RMP(level) (((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M) + +struct rmp_state { + u64 gpa; + u8 assigned; + u8 pagesize; + u8 immutable; + u8 rsvd; + u32 asid; +} __packed; + +#define RMPADJUST_VMSA_PAGE_BIT BIT(16) + +/* SNP Guest message request */ +struct snp_req_data { + unsigned long req_gpa; + unsigned long resp_gpa; + unsigned long data_gpa; + unsigned int data_npages; +}; + +#define MAX_AUTHTAG_LEN 32 +#define AUTHTAG_LEN 16 +#define AAD_LEN 48 +#define MSG_HDR_VER 1 + +#define SNP_REQ_MAX_RETRY_DURATION (60*HZ) +#define SNP_REQ_RETRY_DELAY (2*HZ) + +/* See SNP spec SNP_GUEST_REQUEST section for the structure */ +enum msg_type { + SNP_MSG_TYPE_INVALID = 0, + SNP_MSG_CPUID_REQ, + SNP_MSG_CPUID_RSP, + SNP_MSG_KEY_REQ, + SNP_MSG_KEY_RSP, + SNP_MSG_REPORT_REQ, + SNP_MSG_REPORT_RSP, + SNP_MSG_EXPORT_REQ, + SNP_MSG_EXPORT_RSP, + SNP_MSG_IMPORT_REQ, + SNP_MSG_IMPORT_RSP, + SNP_MSG_ABSORB_REQ, + SNP_MSG_ABSORB_RSP, + SNP_MSG_VMRK_REQ, + SNP_MSG_VMRK_RSP, + + SNP_MSG_TSC_INFO_REQ = 17, + SNP_MSG_TSC_INFO_RSP, + + SNP_MSG_TYPE_MAX +}; + +enum aead_algo { + SNP_AEAD_INVALID, + SNP_AEAD_AES_256_GCM, +}; + +struct snp_guest_msg_hdr { + u8 authtag[MAX_AUTHTAG_LEN]; + u64 msg_seqno; + u8 rsvd1[8]; + u8 algo; + u8 hdr_version; + u16 hdr_sz; + u8 msg_type; + u8 msg_version; + u16 msg_sz; + u32 rsvd2; + u8 msg_vmpck; + u8 rsvd3[35]; +} __packed; + +struct snp_guest_msg { + struct snp_guest_msg_hdr hdr; + u8 payload[PAGE_SIZE - sizeof(struct snp_guest_msg_hdr)]; +} __packed; + +#define SNP_TSC_INFO_REQ_SZ 128 + +struct snp_tsc_info_req { + u8 rsvd[SNP_TSC_INFO_REQ_SZ]; +} __packed; + +struct snp_tsc_info_resp { + u32 status; + u32 rsvd1; + u64 tsc_scale; + u64 tsc_offset; + u32 tsc_factor; + u8 rsvd2[100]; +} __packed; + +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + +struct snp_guest_req { + void *req_buf; + size_t req_sz; + + void *resp_buf; + size_t resp_sz; + + u64 exit_code; + u64 exitinfo2; + unsigned int vmpck_id; + u8 msg_version; + u8 msg_type; + + struct snp_req_data input; + void *certs_data; +}; + +/* + * The secrets page contains 96-bytes of reserved field that can be used by + * the guest OS. The guest OS uses the area to save the message sequence + * number for each VMPCK. + * + * See the GHCB spec section Secret page layout for the format for this area. + */ +struct secrets_os_area { + u32 msg_seqno_0; + u32 msg_seqno_1; + u32 msg_seqno_2; + u32 msg_seqno_3; + u64 ap_jump_table_pa; + u8 rsvd[40]; + u8 guest_usage[32]; +} __packed; + +#define VMPCK_KEY_LEN 32 + +/* See the SNP spec version 0.9 for secrets page format */ +struct snp_secrets_page { + u32 version; + u32 imien : 1, + rsvd1 : 31; + u32 fms; + u32 rsvd2; + u8 gosvw[16]; + u8 vmpck0[VMPCK_KEY_LEN]; + u8 vmpck1[VMPCK_KEY_LEN]; + u8 vmpck2[VMPCK_KEY_LEN]; + u8 vmpck3[VMPCK_KEY_LEN]; + struct secrets_os_area os_area; + + u8 vmsa_tweak_bitmap[64]; + + /* SVSM fields */ + u64 svsm_base; + u64 svsm_size; + u64 svsm_caa; + u32 svsm_max_version; + u8 svsm_guest_vmpl; + u8 rsvd3[3]; + + /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + + /* Remainder of page */ + u8 rsvd4[3740]; +} __packed; + +struct snp_msg_desc { + /* request and response are in unencrypted memory */ + struct snp_guest_msg *request, *response; + + /* + * Avoid information leakage by double-buffering shared messages + * in fields that are in regular encrypted memory. + */ + struct snp_guest_msg secret_request, secret_response; + + struct snp_secrets_page *secrets; + + struct aesgcm_ctx *ctx; + + u32 *os_area_msg_seqno; + u8 *vmpck; + int vmpck_id; +}; + +/* + * The SVSM Calling Area (CA) related structures. + */ +struct svsm_ca { + u8 call_pending; + u8 mem_available; + u8 rsvd1[6]; + + u8 svsm_buffer[PAGE_SIZE - 8]; +}; + +#define SVSM_SUCCESS 0 +#define SVSM_ERR_INCOMPLETE 0x80000000 +#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001 +#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002 +#define SVSM_ERR_INVALID_ADDRESS 0x80000003 +#define SVSM_ERR_INVALID_FORMAT 0x80000004 +#define SVSM_ERR_INVALID_PARAMETER 0x80000005 +#define SVSM_ERR_INVALID_REQUEST 0x80000006 +#define SVSM_ERR_BUSY 0x80000007 +#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006 + +/* + * The SVSM PVALIDATE related structures + */ +struct svsm_pvalidate_entry { + u64 page_size : 2, + action : 1, + ignore_cf : 1, + rsvd : 8, + pfn : 52; +}; + +struct svsm_pvalidate_call { + u16 num_entries; + u16 cur_index; + + u8 rsvd1[4]; + + struct svsm_pvalidate_entry entry[]; +}; + +#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \ + offsetof(struct svsm_pvalidate_call, entry)) / \ + sizeof(struct svsm_pvalidate_entry)) + +/* + * The SVSM Attestation related structures + */ +struct svsm_loc_entry { + u64 pa; + u32 len; + u8 rsvd[4]; +}; + +struct svsm_attest_call { + struct svsm_loc_entry report_buf; + struct svsm_loc_entry nonce; + struct svsm_loc_entry manifest_buf; + struct svsm_loc_entry certificates_buf; + + /* For attesting a single service */ + u8 service_guid[16]; + u32 service_manifest_ver; + u8 rsvd[4]; +}; + +/* PTE descriptor used for the prepare_pte_enc() operations. */ +struct pte_enc_desc { + pte_t *kpte; + int pte_level; + bool encrypt; + /* pfn of the kpte above */ + unsigned long pfn; + /* physical address of @pfn */ + unsigned long pa; + /* virtual address of @pfn */ + void *va; + /* memory covered by the pte */ + unsigned long size; + pgprot_t new_pgprot; +}; + +/* + * SVSM protocol structure + */ +struct svsm_call { + struct svsm_ca *caa; + u64 rax; + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + u64 rax_out; + u64 rcx_out; + u64 rdx_out; + u64 r8_out; + u64 r9_out; +}; + +#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x)) +#define SVSM_CORE_REMAP_CA 0 +#define SVSM_CORE_PVALIDATE 1 +#define SVSM_CORE_CREATE_VCPU 2 +#define SVSM_CORE_DELETE_VCPU 3 + +#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x)) +#define SVSM_ATTEST_SERVICES 0 +#define SVSM_ATTEST_SINGLE_SERVICE 1 + +#define SVSM_VTPM_CALL(x) ((2ULL << 32) | (x)) +#define SVSM_VTPM_QUERY 0 +#define SVSM_VTPM_CMD 1 + +#ifdef CONFIG_AMD_MEM_ENCRYPT + +extern u8 snp_vmpl; + +extern void __sev_es_ist_enter(struct pt_regs *regs); +extern void __sev_es_ist_exit(void); +static __always_inline void sev_es_ist_enter(struct pt_regs *regs) +{ + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) + __sev_es_ist_enter(regs); +} +static __always_inline void sev_es_ist_exit(void) +{ + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) + __sev_es_ist_exit(); +} +extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh); +extern void __sev_es_nmi_complete(void); +static __always_inline void sev_es_nmi_complete(void) +{ + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) + __sev_es_nmi_complete(); +} +extern int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); + +/* + * RMPADJUST modifies the RMP permissions of a page of a lesser- + * privileged (numerically higher) VMPL. + * + * If the guest is running at a higher-privilege than the privilege + * level the instruction is targeting, the instruction will succeed, + * otherwise, it will fail. + */ +static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) +{ + int rc; + + /* "rmpadjust" mnemonic support in binutils 2.36 and newer */ + asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t" + : "=a"(rc) + : "a"(vaddr), "c"(rmp_psize), "d"(attrs) + : "memory", "cc"); + + return rc; +} +static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) +{ + bool no_rmpupdate; + int rc; + + /* "pvalidate" mnemonic support in binutils 2.36 and newer */ + asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t" + : "=@ccc"(no_rmpupdate), "=a"(rc) + : "a"(vaddr), "c"(rmp_psize), "d"(validate) + : "memory", "cc"); + + if (no_rmpupdate) + return PVALIDATE_FAIL_NOUPDATE; + + return rc; +} + +void setup_ghcb(void); +void snp_register_ghcb_early(unsigned long paddr); +void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); +void snp_set_memory_private(unsigned long vaddr, unsigned long npages); +void snp_set_wakeup_secondary_cpu(void); +bool snp_init(struct boot_params *bp); +void snp_dmi_setup(void); +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input); +void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); +void sev_show_status(void); +int prepare_pte_enc(struct pte_enc_desc *d); +void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot); +void snp_kexec_finish(void); +void snp_kexec_begin(void); + +int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id); +struct snp_msg_desc *snp_msg_alloc(void); +void snp_msg_free(struct snp_msg_desc *mdesc); +int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req); + +int snp_svsm_vtpm_send_command(u8 *buffer); + +void __init snp_secure_tsc_prepare(void); +void __init snp_secure_tsc_init(void); +enum es_result savic_register_gpa(u64 gpa); +enum es_result savic_unregister_gpa(u64 *gpa); +u64 savic_ghcb_msr_read(u32 reg); +void savic_ghcb_msr_write(u32 reg, u64 value); + +static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) +{ + ghcb->save.sw_exit_code = 0; + __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); +} + +/* I/O parameters for CPUID-related helpers */ +struct cpuid_leaf { + u32 fn; + u32 subfn; + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +}; + +int svsm_perform_msr_protocol(struct svsm_call *call); +int __pi_svsm_perform_msr_protocol(struct svsm_call *call); +int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf), + void *ctx, struct cpuid_leaf *leaf); + +void svsm_issue_call(struct svsm_call *call, u8 *pending); +int svsm_process_result_codes(struct svsm_call *call); + +void __noreturn sev_es_terminate(unsigned int set, unsigned int reason); +enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, + struct es_em_ctxt *ctxt, + u64 exit_code, u64 exit_info_1, + u64 exit_info_2); + +bool sev_es_negotiate_protocol(void); +bool sev_es_check_cpu_features(void); + +extern u16 ghcb_version; +extern struct ghcb *boot_ghcb; +extern bool sev_snp_needs_sfw; + +struct psc_desc { + enum psc_op op; + struct svsm_ca *ca; + u64 caa_pa; +}; + +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} + +#else /* !CONFIG_AMD_MEM_ENCRYPT */ + +#define snp_vmpl 0 +static inline void sev_es_ist_enter(struct pt_regs *regs) { } +static inline void sev_es_ist_exit(void) { } +static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } +static inline void sev_es_nmi_complete(void) { } +static inline int sev_es_efi_map_ghcbs_cas(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } +static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } +static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } +static inline void setup_ghcb(void) { } +static inline void __init +early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } +static inline void __init +early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } +static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } +static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } +static inline void snp_set_wakeup_secondary_cpu(void) { } +static inline bool snp_init(struct boot_params *bp) { return false; } +static inline void snp_dmi_setup(void) { } +static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input) +{ + return -ENOTTY; +} +static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } +static inline void sev_show_status(void) { } +static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; } +static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { } +static inline void snp_kexec_finish(void) { } +static inline void snp_kexec_begin(void) { } +static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; } +static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; } +static inline void snp_msg_free(struct snp_msg_desc *mdesc) { } +static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, + struct snp_guest_req *req) { return -ENODEV; } +static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; } +static inline void __init snp_secure_tsc_prepare(void) { } +static inline void __init snp_secure_tsc_init(void) { } +static inline void sev_evict_cache(void *va, int npages) {} +static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; } +static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; } +static inline void savic_ghcb_msr_write(u32 reg, u64 value) { } +static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; } + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +#ifdef CONFIG_KVM_AMD_SEV +bool snp_probe_rmptable_info(void); +int snp_rmptable_init(void); +int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); +void snp_dump_hva_rmpentry(unsigned long address); +int psmash(u64 pfn); +int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); +int rmp_make_shared(u64 pfn, enum pg_level level); +void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp); +void kdump_sev_callback(void); +void snp_fixup_e820_tables(void); +static inline void snp_leak_pages(u64 pfn, unsigned int pages) +{ + __snp_leak_pages(pfn, pages, true); +} +#else +static inline bool snp_probe_rmptable_info(void) { return false; } +static inline int snp_rmptable_init(void) { return -ENOSYS; } +static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } +static inline void snp_dump_hva_rmpentry(unsigned long address) {} +static inline int psmash(u64 pfn) { return -ENODEV; } +static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, + bool immutable) +{ + return -ENODEV; +} +static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } +static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) {} +static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} +static inline void kdump_sev_callback(void) { } +static inline void snp_fixup_e820_tables(void) {} +#endif + +#endif diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h new file mode 100644 index 000000000000..04958459a7ca --- /dev/null +++ b/arch/x86/include/asm/sgx.h @@ -0,0 +1,430 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2016-20 Intel Corporation. + * + * Intel Software Guard Extensions (SGX) support. + */ +#ifndef _ASM_X86_SGX_H +#define _ASM_X86_SGX_H + +#include <linux/bits.h> +#include <linux/types.h> + +/* + * This file contains both data structures defined by SGX architecture and Linux + * defined software data structures and functions. The two should not be mixed + * together for better readability. The architectural definitions come first. + */ + +/* The SGX specific CPUID function. */ +#define SGX_CPUID 0x12 +/* EPC enumeration. */ +#define SGX_CPUID_EPC 2 +/* An invalid EPC section, i.e. the end marker. */ +#define SGX_CPUID_EPC_INVALID 0x0 +/* A valid EPC section. */ +#define SGX_CPUID_EPC_SECTION 0x1 +/* The bitmask for the EPC section type. */ +#define SGX_CPUID_EPC_MASK GENMASK(3, 0) + +enum sgx_encls_function { + ECREATE = 0x00, + EADD = 0x01, + EINIT = 0x02, + EREMOVE = 0x03, + EDGBRD = 0x04, + EDGBWR = 0x05, + EEXTEND = 0x06, + ELDU = 0x08, + EBLOCK = 0x09, + EPA = 0x0A, + EWB = 0x0B, + ETRACK = 0x0C, + EAUG = 0x0D, + EMODPR = 0x0E, + EMODT = 0x0F, + EUPDATESVN = 0x18, +}; + +/** + * SGX_ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr + * + * ENCLS has its own (positive value) error codes and also generates + * ENCLS specific #GP and #PF faults. And the ENCLS values get munged + * with system error codes as everything percolates back up the stack. + * Unfortunately (for us), we need to precisely identify each unique + * error code, e.g. the action taken if EWB fails varies based on the + * type of fault and on the exact SGX error code, i.e. we can't simply + * convert all faults to -EFAULT. + * + * To make all three error types coexist, we set bit 30 to identify an + * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate + * between positive (faults and SGX error codes) and negative (system + * error codes) values. + */ +#define SGX_ENCLS_FAULT_FLAG 0x40000000 + +/** + * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV + * @SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function. + * @SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not + * been completed yet. + * @SGX_CHILD_PRESENT: SECS has child pages present in the EPC. + * @SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's + * public key does not match IA32_SGXLEPUBKEYHASH. + * @SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it + * is in the PENDING or MODIFIED state. + * @SGX_INSUFFICIENT_ENTROPY: Insufficient entropy in RNG. + * @SGX_NO_UPDATE: EUPDATESVN could not update the CPUSVN because the + * current SVN was not newer than CPUSVN. This is the most + * common error code returned by EUPDATESVN. + * @SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received + */ +enum sgx_return_code { + SGX_EPC_PAGE_CONFLICT = 7, + SGX_NOT_TRACKED = 11, + SGX_CHILD_PRESENT = 13, + SGX_INVALID_EINITTOKEN = 16, + SGX_PAGE_NOT_MODIFIABLE = 20, + SGX_INSUFFICIENT_ENTROPY = 29, + SGX_NO_UPDATE = 31, + SGX_UNMASKED_EVENT = 128, +}; + +/* The modulus size for 3072-bit RSA keys. */ +#define SGX_MODULUS_SIZE 384 + +/** + * enum sgx_miscselect - additional information to an SSA frame + * @SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. + * + * Save State Area (SSA) is a stack inside the enclave used to store processor + * state when an exception or interrupt occurs. This enum defines additional + * information stored to an SSA frame. + */ +enum sgx_miscselect { + SGX_MISC_EXINFO = BIT(0), +}; + +#define SGX_MISC_RESERVED_MASK GENMASK_ULL(63, 1) + +#define SGX_SSA_GPRS_SIZE 184 +#define SGX_SSA_MISC_EXINFO_SIZE 16 + +/** + * enum sgx_attribute - the attributes field in &struct sgx_secs + * @SGX_ATTR_INIT: Enclave can be entered (is initialized). + * @SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). + * @SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave. + * @SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote + * attestation. + * @SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). + * @SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to + * sign cryptographic tokens that can be passed to + * EINIT as an authorization to run an enclave. + * @SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an + * asynchronous exit has occurred. + */ +enum sgx_attribute { + SGX_ATTR_INIT = BIT(0), + SGX_ATTR_DEBUG = BIT(1), + SGX_ATTR_MODE64BIT = BIT(2), + /* BIT(3) is reserved */ + SGX_ATTR_PROVISIONKEY = BIT(4), + SGX_ATTR_EINITTOKENKEY = BIT(5), + /* BIT(6) is for CET */ + SGX_ATTR_KSS = BIT(7), + /* BIT(8) is reserved */ + /* BIT(9) is reserved */ + SGX_ATTR_ASYNC_EXIT_NOTIFY = BIT(10), +}; + +#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | \ + BIT_ULL(6) | \ + BIT_ULL(8) | \ + BIT_ULL(9) | \ + GENMASK_ULL(63, 11)) + +#define SGX_ATTR_UNPRIV_MASK (SGX_ATTR_DEBUG | \ + SGX_ATTR_MODE64BIT | \ + SGX_ATTR_KSS | \ + SGX_ATTR_ASYNC_EXIT_NOTIFY) + +#define SGX_ATTR_PRIV_MASK (SGX_ATTR_PROVISIONKEY | \ + SGX_ATTR_EINITTOKENKEY) + +/** + * struct sgx_secs - SGX Enclave Control Structure (SECS) + * @size: size of the address space + * @base: base address of the address space + * @ssa_frame_size: size of an SSA frame + * @miscselect: additional information stored to an SSA frame + * @attributes: attributes for enclave + * @xfrm: XSave-Feature Request Mask (subset of XCR0) + * @mrenclave: SHA256-hash of the enclave contents + * @mrsigner: SHA256-hash of the public key used to sign the SIGSTRUCT + * @config_id: a user-defined value that is used in key derivation + * @isv_prod_id: a user-defined value that is used in key derivation + * @isv_svn: a user-defined value that is used in key derivation + * @config_svn: a user-defined value that is used in key derivation + * + * SGX Enclave Control Structure (SECS) is a special enclave page that is not + * visible in the address space. In fact, this structure defines the address + * range and other global attributes for the enclave and it is the first EPC + * page created for any enclave. It is moved from a temporary buffer to an EPC + * by the means of ENCLS[ECREATE] function. + */ +struct sgx_secs { + u64 size; + u64 base; + u32 ssa_frame_size; + u32 miscselect; + u8 reserved1[24]; + u64 attributes; + u64 xfrm; + u32 mrenclave[8]; + u8 reserved2[32]; + u32 mrsigner[8]; + u8 reserved3[32]; + u32 config_id[16]; + u16 isv_prod_id; + u16 isv_svn; + u16 config_svn; + u8 reserved4[3834]; +} __packed; + +/** + * enum sgx_tcs_flags - execution flags for TCS + * @SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints + * inside an enclave. It is cleared by EADD but can + * be set later with EDBGWR. + */ +enum sgx_tcs_flags { + SGX_TCS_DBGOPTIN = 0x01, +}; + +#define SGX_TCS_RESERVED_MASK GENMASK_ULL(63, 1) +#define SGX_TCS_RESERVED_SIZE 4024 + +/** + * struct sgx_tcs - Thread Control Structure (TCS) + * @state: used to mark an entered TCS + * @flags: execution flags (cleared by EADD) + * @ssa_offset: SSA stack offset relative to the enclave base + * @ssa_index: the current SSA frame index (cleard by EADD) + * @nr_ssa_frames: the number of frame in the SSA stack + * @entry_offset: entry point offset relative to the enclave base + * @exit_addr: address outside the enclave to exit on an exception or + * interrupt + * @fs_offset: offset relative to the enclave base to become FS + * segment inside the enclave + * @gs_offset: offset relative to the enclave base to become GS + * segment inside the enclave + * @fs_limit: size to become a new FS-limit (only 32-bit enclaves) + * @gs_limit: size to become a new GS-limit (only 32-bit enclaves) + * + * Thread Control Structure (TCS) is an enclave page visible in its address + * space that defines an entry point inside the enclave. A thread enters inside + * an enclave by supplying address of TCS to ENCLU(EENTER). A TCS can be entered + * by only one thread at a time. + */ +struct sgx_tcs { + u64 state; + u64 flags; + u64 ssa_offset; + u32 ssa_index; + u32 nr_ssa_frames; + u64 entry_offset; + u64 exit_addr; + u64 fs_offset; + u64 gs_offset; + u32 fs_limit; + u32 gs_limit; + u8 reserved[SGX_TCS_RESERVED_SIZE]; +} __packed; + +/** + * struct sgx_pageinfo - an enclave page descriptor + * @addr: address of the enclave page + * @contents: pointer to the page contents + * @metadata: pointer either to a SECINFO or PCMD instance + * @secs: address of the SECS page + */ +struct sgx_pageinfo { + u64 addr; + u64 contents; + u64 metadata; + u64 secs; +} __packed __aligned(32); + + +/** + * enum sgx_page_type - bits in the SECINFO flags defining the page type + * @SGX_PAGE_TYPE_SECS: a SECS page + * @SGX_PAGE_TYPE_TCS: a TCS page + * @SGX_PAGE_TYPE_REG: a regular page + * @SGX_PAGE_TYPE_VA: a VA page + * @SGX_PAGE_TYPE_TRIM: a page in trimmed state + * + * Make sure when making changes to this enum that its values can still fit + * in the bitfield within &struct sgx_encl_page + */ +enum sgx_page_type { + SGX_PAGE_TYPE_SECS, + SGX_PAGE_TYPE_TCS, + SGX_PAGE_TYPE_REG, + SGX_PAGE_TYPE_VA, + SGX_PAGE_TYPE_TRIM, +}; + +#define SGX_NR_PAGE_TYPES 5 +#define SGX_PAGE_TYPE_MASK GENMASK(7, 0) + +/** + * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo + * @SGX_SECINFO_R: allow read + * @SGX_SECINFO_W: allow write + * @SGX_SECINFO_X: allow execution + * @SGX_SECINFO_SECS: a SECS page + * @SGX_SECINFO_TCS: a TCS page + * @SGX_SECINFO_REG: a regular page + * @SGX_SECINFO_VA: a VA page + * @SGX_SECINFO_TRIM: a page in trimmed state + */ +enum sgx_secinfo_flags { + SGX_SECINFO_R = BIT(0), + SGX_SECINFO_W = BIT(1), + SGX_SECINFO_X = BIT(2), + SGX_SECINFO_SECS = (SGX_PAGE_TYPE_SECS << 8), + SGX_SECINFO_TCS = (SGX_PAGE_TYPE_TCS << 8), + SGX_SECINFO_REG = (SGX_PAGE_TYPE_REG << 8), + SGX_SECINFO_VA = (SGX_PAGE_TYPE_VA << 8), + SGX_SECINFO_TRIM = (SGX_PAGE_TYPE_TRIM << 8), +}; + +#define SGX_SECINFO_PERMISSION_MASK GENMASK_ULL(2, 0) +#define SGX_SECINFO_PAGE_TYPE_MASK (SGX_PAGE_TYPE_MASK << 8) +#define SGX_SECINFO_RESERVED_MASK ~(SGX_SECINFO_PERMISSION_MASK | \ + SGX_SECINFO_PAGE_TYPE_MASK) + +/** + * struct sgx_secinfo - describes attributes of an EPC page + * @flags: permissions and type + * + * Used together with ENCLS leaves that add or modify an EPC page to an + * enclave to define page permissions and type. + */ +struct sgx_secinfo { + u64 flags; + u8 reserved[56]; +} __packed __aligned(64); + +#define SGX_PCMD_RESERVED_SIZE 40 + +/** + * struct sgx_pcmd - Paging Crypto Metadata (PCMD) + * @enclave_id: enclave identifier + * @mac: MAC over PCMD, page contents and isvsvn + * + * PCMD is stored for every swapped page to the regular memory. When ELDU loads + * the page back it recalculates the MAC by using a isvsvn number stored in a + * VA page. Together these two structures bring integrity and rollback + * protection. + */ +struct sgx_pcmd { + struct sgx_secinfo secinfo; + u64 enclave_id; + u8 reserved[SGX_PCMD_RESERVED_SIZE]; + u8 mac[16]; +} __packed __aligned(128); + +#define SGX_SIGSTRUCT_RESERVED1_SIZE 84 +#define SGX_SIGSTRUCT_RESERVED2_SIZE 20 +#define SGX_SIGSTRUCT_RESERVED3_SIZE 32 +#define SGX_SIGSTRUCT_RESERVED4_SIZE 12 + +/** + * struct sgx_sigstruct_header - defines author of the enclave + * @header1: constant byte string + * @vendor: must be either 0x0000 or 0x8086 + * @date: YYYYMMDD in BCD + * @header2: constant byte string + * @swdefined: software defined value + */ +struct sgx_sigstruct_header { + u64 header1[2]; + u32 vendor; + u32 date; + u64 header2[2]; + u32 swdefined; + u8 reserved1[84]; +} __packed; + +/** + * struct sgx_sigstruct_body - defines contents of the enclave + * @miscselect: additional information stored to an SSA frame + * @misc_mask: required miscselect in SECS + * @attributes: attributes for enclave + * @xfrm: XSave-Feature Request Mask (subset of XCR0) + * @attributes_mask: required attributes in SECS + * @xfrm_mask: required XFRM in SECS + * @mrenclave: SHA256-hash of the enclave contents + * @isvprodid: a user-defined value that is used in key derivation + * @isvsvn: a user-defined value that is used in key derivation + */ +struct sgx_sigstruct_body { + u32 miscselect; + u32 misc_mask; + u8 reserved2[20]; + u64 attributes; + u64 xfrm; + u64 attributes_mask; + u64 xfrm_mask; + u8 mrenclave[32]; + u8 reserved3[32]; + u16 isvprodid; + u16 isvsvn; +} __packed; + +/** + * struct sgx_sigstruct - an enclave signature + * @header: defines author of the enclave + * @modulus: the modulus of the public key + * @exponent: the exponent of the public key + * @signature: the signature calculated over the fields except modulus, + * @body: defines contents of the enclave + * @q1: a value used in RSA signature verification + * @q2: a value used in RSA signature verification + * + * Header and body are the parts that are actual signed. The remaining fields + * define the signature of the enclave. + */ +struct sgx_sigstruct { + struct sgx_sigstruct_header header; + u8 modulus[SGX_MODULUS_SIZE]; + u32 exponent; + u8 signature[SGX_MODULUS_SIZE]; + struct sgx_sigstruct_body body; + u8 reserved4[12]; + u8 q1[SGX_MODULUS_SIZE]; + u8 q2[SGX_MODULUS_SIZE]; +} __packed; + +#define SGX_LAUNCH_TOKEN_SIZE 304 + +/* + * Do not put any hardware-defined SGX structure representations below this + * comment! + */ + +#ifdef CONFIG_X86_SGX_KVM +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, + int *trapnr); +int sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs, u64 *lepubkeyhash, int *trapnr); +#endif + +int sgx_set_attribute(unsigned long *allowed_attributes, + unsigned int attribute_fd); + +#endif /* _ASM_X86_SGX_H */ diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h new file mode 100644 index 000000000000..8009d781c2f9 --- /dev/null +++ b/arch/x86/include/asm/shared/io.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHARED_IO_H +#define _ASM_X86_SHARED_IO_H + +#include <linux/types.h> + +#define BUILDIO(bwl, bw, type) \ +static __always_inline void __out##bwl(type value, u16 port) \ +{ \ + asm volatile("out" #bwl " %" #bw "0, %w1" \ + : : "a"(value), "Nd"(port)); \ +} \ + \ +static __always_inline type __in##bwl(u16 port) \ +{ \ + type value; \ + asm volatile("in" #bwl " %w1, %" #bw "0" \ + : "=a"(value) : "Nd"(port)); \ + return value; \ +} + +BUILDIO(b, b, u8) +BUILDIO(w, w, u16) +BUILDIO(l, , u32) +#undef BUILDIO + +#define inb __inb +#define inw __inw +#define inl __inl +#define outb __outb +#define outw __outw +#define outl __outl + +#endif diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h new file mode 100644 index 000000000000..a20b1c08c99f --- /dev/null +++ b/arch/x86/include/asm/shared/msr.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHARED_MSR_H +#define _ASM_X86_SHARED_MSR_H + +struct msr { + union { + struct { + u32 l; + u32 h; + }; + u64 q; + }; +}; + +/* + * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the + * boot kernel since they rely on tracepoint/exception handling infrastructure + * that's not available here. + */ +static inline void raw_rdmsr(unsigned int reg, struct msr *m) +{ + asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg)); +} + +static inline void raw_wrmsr(unsigned int reg, const struct msr *m) +{ + asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory"); +} + +#endif /* _ASM_X86_SHARED_MSR_H */ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h new file mode 100644 index 000000000000..8bc074c8d7c6 --- /dev/null +++ b/arch/x86/include/asm/shared/tdx.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHARED_TDX_H +#define _ASM_X86_SHARED_TDX_H + +#include <linux/bits.h> +#include <linux/types.h> + +#define TDX_HYPERCALL_STANDARD 0 + +#define TDX_CPUID_LEAF_ID 0x21 +#define TDX_IDENT "IntelTDX " + +/* TDX module Call Leaf IDs */ +#define TDG_VP_VMCALL 0 +#define TDG_VP_INFO 1 +#define TDG_MR_RTMR_EXTEND 2 +#define TDG_VP_VEINFO_GET 3 +#define TDG_MR_REPORT 4 +#define TDG_MEM_PAGE_ACCEPT 6 +#define TDG_VM_RD 7 +#define TDG_VM_WR 8 + +/* TDX attributes */ +#define TDX_ATTR_DEBUG_BIT 0 +#define TDX_ATTR_DEBUG BIT_ULL(TDX_ATTR_DEBUG_BIT) +#define TDX_ATTR_HGS_PLUS_PROF_BIT 4 +#define TDX_ATTR_HGS_PLUS_PROF BIT_ULL(TDX_ATTR_HGS_PLUS_PROF_BIT) +#define TDX_ATTR_PERF_PROF_BIT 5 +#define TDX_ATTR_PERF_PROF BIT_ULL(TDX_ATTR_PERF_PROF_BIT) +#define TDX_ATTR_PMT_PROF_BIT 6 +#define TDX_ATTR_PMT_PROF BIT_ULL(TDX_ATTR_PMT_PROF_BIT) +#define TDX_ATTR_ICSSD_BIT 16 +#define TDX_ATTR_ICSSD BIT_ULL(TDX_ATTR_ICSSD_BIT) +#define TDX_ATTR_LASS_BIT 27 +#define TDX_ATTR_LASS BIT_ULL(TDX_ATTR_LASS_BIT) +#define TDX_ATTR_SEPT_VE_DISABLE_BIT 28 +#define TDX_ATTR_SEPT_VE_DISABLE BIT_ULL(TDX_ATTR_SEPT_VE_DISABLE_BIT) +#define TDX_ATTR_MIGRTABLE_BIT 29 +#define TDX_ATTR_MIGRTABLE BIT_ULL(TDX_ATTR_MIGRTABLE_BIT) +#define TDX_ATTR_PKS_BIT 30 +#define TDX_ATTR_PKS BIT_ULL(TDX_ATTR_PKS_BIT) +#define TDX_ATTR_KL_BIT 31 +#define TDX_ATTR_KL BIT_ULL(TDX_ATTR_KL_BIT) +#define TDX_ATTR_TPA_BIT 62 +#define TDX_ATTR_TPA BIT_ULL(TDX_ATTR_TPA_BIT) +#define TDX_ATTR_PERFMON_BIT 63 +#define TDX_ATTR_PERFMON BIT_ULL(TDX_ATTR_PERFMON_BIT) + +/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */ +#define TDCS_CONFIG_FLAGS 0x1110000300000016 +#define TDCS_TD_CTLS 0x1110000300000017 +#define TDCS_NOTIFY_ENABLES 0x9100000000000010 +#define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019 + +/* TDCS_CONFIG_FLAGS bits */ +#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1) + +/* TDCS_TD_CTLS bits */ +#define TD_CTLS_PENDING_VE_DISABLE_BIT 0 +#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(TD_CTLS_PENDING_VE_DISABLE_BIT) +#define TD_CTLS_ENUM_TOPOLOGY_BIT 1 +#define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(TD_CTLS_ENUM_TOPOLOGY_BIT) +#define TD_CTLS_VIRT_CPUID2_BIT 2 +#define TD_CTLS_VIRT_CPUID2 BIT_ULL(TD_CTLS_VIRT_CPUID2_BIT) +#define TD_CTLS_REDUCE_VE_BIT 3 +#define TD_CTLS_REDUCE_VE BIT_ULL(TD_CTLS_REDUCE_VE_BIT) +#define TD_CTLS_LOCK_BIT 63 +#define TD_CTLS_LOCK BIT_ULL(TD_CTLS_LOCK_BIT) + +/* TDX hypercall Leaf IDs */ +#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 +#define TDVMCALL_MAP_GPA 0x10001 +#define TDVMCALL_GET_QUOTE 0x10002 +#define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL + +/* + * TDG.VP.VMCALL Status Codes (returned in R10) + */ +#define TDVMCALL_STATUS_SUCCESS 0x0000000000000000ULL +#define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL +#define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL +#define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL +#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL + +/* + * Bitmasks of exposed registers (with VMM). + */ +#define TDX_RDX BIT(2) +#define TDX_RBX BIT(3) +#define TDX_RSI BIT(6) +#define TDX_RDI BIT(7) +#define TDX_R8 BIT(8) +#define TDX_R9 BIT(9) +#define TDX_R10 BIT(10) +#define TDX_R11 BIT(11) +#define TDX_R12 BIT(12) +#define TDX_R13 BIT(13) +#define TDX_R14 BIT(14) +#define TDX_R15 BIT(15) + +/* + * These registers are clobbered to hold arguments for each + * TDVMCALL. They are safe to expose to the VMM. + * Each bit in this mask represents a register ID. Bit field + * details can be found in TDX GHCI specification, section + * titled "TDCALL [TDG.VP.VMCALL] leaf". + */ +#define TDVMCALL_EXPOSE_REGS_MASK \ + (TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \ + TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15) + +/* TDX supported page sizes from the TDX module ABI. */ +#define TDX_PS_4K 0 +#define TDX_PS_2M 1 +#define TDX_PS_1G 2 +#define TDX_PS_NR (TDX_PS_1G + 1) + +#ifndef __ASSEMBLER__ + +#include <linux/compiler_attributes.h> + +/* + * Used in __tdcall*() to gather the input/output registers' values of the + * TDCALL instruction when requesting services from the TDX module. This is a + * software only structure and not part of the TDX module/VMM ABI + */ +struct tdx_module_args { + /* callee-clobbered */ + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + /* extra callee-clobbered */ + u64 r10; + u64 r11; + /* callee-saved + rdi/rsi */ + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u64 rbx; + u64 rdi; + u64 rsi; +}; + +/* Used to communicate with the TDX module */ +u64 __tdcall(u64 fn, struct tdx_module_args *args); +u64 __tdcall_ret(u64 fn, struct tdx_module_args *args); +u64 __tdcall_saved_ret(u64 fn, struct tdx_module_args *args); + +/* Used to request services from the VMM */ +u64 __tdx_hypercall(struct tdx_module_args *args); + +/* + * Wrapper for standard use of __tdx_hypercall with no output aside from + * return code. + */ +static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) +{ + struct tdx_module_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = fn, + .r12 = r12, + .r13 = r13, + .r14 = r14, + .r15 = r15, + }; + + return __tdx_hypercall(&args); +} + + +/* Called from __tdx_hypercall() for unrecoverable failure */ +void __noreturn __tdx_hypercall_failed(void); + +bool tdx_accept_memory(phys_addr_t start, phys_addr_t end); + +/* + * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined + * independently from but are currently matched 1:1 with VMX EXIT_REASONs. + * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and + * guest sides of these calls. + */ +static __always_inline u64 hcall_func(u64 exit_reason) +{ + return exit_reason; +} + +#endif /* !__ASSEMBLER__ */ +#endif /* _ASM_X86_SHARED_TDX_H */ diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h new file mode 100644 index 000000000000..fc7dcec58fd4 --- /dev/null +++ b/arch/x86/include/asm/shstk.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHSTK_H +#define _ASM_X86_SHSTK_H + +#ifndef __ASSEMBLER__ +#include <linux/types.h> + +struct task_struct; +struct ksignal; + +#ifdef CONFIG_X86_USER_SHADOW_STACK +struct thread_shstk { + u64 base; + u64 size; +}; + +long shstk_prctl(struct task_struct *task, int option, unsigned long arg2); +void reset_thread_features(void); +unsigned long shstk_alloc_thread_stack(struct task_struct *p, u64 clone_flags, + unsigned long stack_size); +void shstk_free(struct task_struct *p); +int setup_signal_shadow_stack(struct ksignal *ksig); +int restore_signal_shadow_stack(void); +int shstk_update_last_frame(unsigned long val); +bool shstk_is_enabled(void); +int shstk_pop(u64 *val); +int shstk_push(u64 val); +#else +static inline long shstk_prctl(struct task_struct *task, int option, + unsigned long arg2) { return -EINVAL; } +static inline void reset_thread_features(void) {} +static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p, + u64 clone_flags, + unsigned long stack_size) { return 0; } +static inline void shstk_free(struct task_struct *p) {} +static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; } +static inline int restore_signal_shadow_stack(void) { return 0; } +static inline int shstk_update_last_frame(unsigned long val) { return 0; } +static inline bool shstk_is_enabled(void) { return false; } +static inline int shstk_pop(u64 *val) { return -ENOTSUPP; } +static inline int shstk_push(u64 val) { return -ENOTSUPP; } +#endif /* CONFIG_X86_USER_SHADOW_STACK */ + +#endif /* __ASSEMBLER__ */ + +#endif /* _ASM_X86_SHSTK_H */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index f176114c04d4..84eab2724875 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -33,11 +33,7 @@ struct sigframe_ia32 { * legacy application accessing/modifying it. */ struct _fpstate_32 fpstate_unused; -#ifdef CONFIG_IA32_EMULATION - unsigned int extramask[_COMPAT_NSIG_WORDS-1]; -#else /* !CONFIG_IA32_EMULATION */ - unsigned long extramask[_NSIG_WORDS-1]; -#endif /* CONFIG_IA32_EMULATION */ + unsigned int extramask[1]; char retcode[8]; /* fp state follows here */ }; diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 2fcbd6f33ef7..8727c7e21dd1 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -14,12 +14,36 @@ X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); -int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, - struct pt_regs *regs, unsigned long mask); +void __user * +get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, + void __user **fpstate); -#ifdef CONFIG_X86_X32_ABI -asmlinkage long sys32_x32_rt_sigreturn(void); +int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs); +int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); + +/* + * To prevent immediate repeat of single step trap on return from SIGTRAP + * handler if the trap flag (TF) is set without an external debugger attached, + * clear the software event flag in the augmented SS, ensuring no single-step + * trap is pending upon ERETU completion. + * + * Note, this function should be called in sigreturn() before the original + * state is restored to make sure the TF is read from the entry frame. + */ +static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs) +{ + /* + * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction + * is being single-stepped, do not clear the software event flag in the + * augmented SS, thus a debugger won't skip over the following instruction. + */ +#ifdef CONFIG_X86_FRED + if (!(regs->flags & X86_EFLAGS_TF)) + regs->fred_ss.swevent = 0; #endif +} #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 33d3c88a7225..5c03aaa89014 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_SIGNAL_H #define _ASM_X86_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> /* Most things should be clean enough to redefine this at will, if care @@ -28,14 +28,9 @@ typedef struct { #define SA_IA32_ABI 0x02000000u #define SA_X32_ABI 0x01000000u -#ifndef CONFIG_COMPAT -typedef sigset_t compat_sigset_t; -#endif - -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include <uapi/asm/signal.h> -#ifndef __ASSEMBLY__ -extern void do_signal(struct pt_regs *regs); +#ifndef __ASSEMBLER__ #define __ARCH_HAS_SA_RESTORER @@ -88,8 +83,7 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { bool ret; - asm("btl %2,%1" CC_SET(c) - : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1)); + asm("btl %2,%1" : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1)); return ret; } @@ -106,5 +100,5 @@ struct pt_regs; #endif /* !__i386__ */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SIGNAL_H */ diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h index a341c878e977..b8027b63cd7a 100644 --- a/arch/x86/include/asm/simd.h +++ b/arch/x86/include/asm/simd.h @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SIMD_H +#define _ASM_SIMD_H #include <asm/fpu/api.h> +#include <linux/compiler_attributes.h> +#include <linux/types.h> /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -10,3 +14,5 @@ static __must_check inline bool may_use_simd(void) { return irq_fpu_usable(); } + +#endif /* _ASM_SIMD_H */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index db333300bd4b..977bef14a0ab 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -1,79 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Supervisor Mode Access Prevention support * * Copyright (C) 2012 Intel Corporation * Author: H. Peter Anvin <hpa@linux.intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. */ #ifndef _ASM_X86_SMAP_H #define _ASM_X86_SMAP_H -#include <linux/stringify.h> #include <asm/nops.h> #include <asm/cpufeatures.h> +#include <asm/alternative.h> -/* "Raw" instruction opcodes */ -#define __ASM_CLAC .byte 0x0f,0x01,0xca -#define __ASM_STAC .byte 0x0f,0x01,0xcb - -#ifdef __ASSEMBLY__ - -#include <asm/alternative-asm.h> - -#ifdef CONFIG_X86_SMAP +#ifdef __ASSEMBLER__ #define ASM_CLAC \ - ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP + ALTERNATIVE "", "clac", X86_FEATURE_SMAP #define ASM_STAC \ - ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP + ALTERNATIVE "", "stac", X86_FEATURE_SMAP -#else /* CONFIG_X86_SMAP */ +#else /* __ASSEMBLER__ */ -#define ASM_CLAC -#define ASM_STAC +/* + * The CLAC/STAC instructions toggle the enforcement of + * X86_FEATURE_SMAP along with X86_FEATURE_LASS. + * + * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page + * tables. The kernel is not allowed to touch pages with that bit set + * unless the AC bit is set. + * + * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings, + * regardless of location. + * + * Note: a barrier is implicit in alternative(). + */ -#endif /* CONFIG_X86_SMAP */ +static __always_inline void clac(void) +{ + alternative("", "clac", X86_FEATURE_SMAP); +} -#else /* __ASSEMBLY__ */ +static __always_inline void stac(void) +{ + alternative("", "stac", X86_FEATURE_SMAP); +} -#include <asm/alternative.h> +/* + * LASS enforcement is based on bit 63 of the virtual address. The + * kernel is not allowed to touch memory in the lower half of the + * virtual address space. + * + * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data + * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP. + * + * Even with the AC bit set, LASS will continue to block instruction + * fetches from the user half of the address space. To allow those, + * clear CR4.LASS to disable the LASS mechanism entirely. + * + * Note: a barrier is implicit in alternative(). + */ -#ifdef CONFIG_X86_SMAP +static __always_inline void lass_clac(void) +{ + alternative("", "clac", X86_FEATURE_LASS); +} -static __always_inline void clac(void) +static __always_inline void lass_stac(void) { - /* Note: a barrier is implicit in alternative() */ - alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP); + alternative("", "stac", X86_FEATURE_LASS); } -static __always_inline void stac(void) +static __always_inline unsigned long smap_save(void) { - /* Note: a barrier is implicit in alternative() */ - alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP); + unsigned long flags; + + asm volatile ("# smap_save\n\t" + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t" + "", "pushf; pop %0; clac", + X86_FEATURE_SMAP) + : "=rm" (flags) : : "memory", "cc"); + + return flags; +} + +static __always_inline void smap_restore(unsigned long flags) +{ + asm volatile ("# smap_restore\n\t" + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t" + "", "push %0; popf", + X86_FEATURE_SMAP) + : : "g" (flags) : "memory", "cc"); } /* These macros can be used in asm() statements */ #define ASM_CLAC \ - ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP) + ALTERNATIVE("", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ - ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP) - -#else /* CONFIG_X86_SMAP */ - -static inline void clac(void) { } -static inline void stac(void) { } - -#define ASM_CLAC -#define ASM_STAC + ALTERNATIVE("", "stac", X86_FEATURE_SMAP) -#endif /* CONFIG_X86_SMAP */ +#define ASM_CLAC_UNSAFE \ + ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "clac", X86_FEATURE_SMAP) +#define ASM_STAC_UNSAFE \ + ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "stac", X86_FEATURE_SMAP) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SMAP_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 2e95b6c1bca3..84951572ab81 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -1,44 +1,23 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SMP_H #define _ASM_X86_SMP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> -#include <asm/percpu.h> +#include <linux/thread_info.h> -/* - * We need the APIC definitions automatically as part of 'smp.h' - */ -#ifdef CONFIG_X86_LOCAL_APIC -# include <asm/mpspec.h> -# include <asm/apic.h> -# ifdef CONFIG_X86_IO_APIC -# include <asm/io_apic.h> -# endif -#endif -#include <asm/thread_info.h> #include <asm/cpumask.h> -extern int smp_num_siblings; -extern unsigned int num_processors; +DECLARE_PER_CPU_CACHE_HOT(int, cpu_number); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); -DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); -DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); -static inline struct cpumask *cpu_llc_shared_mask(int cpu) -{ - return per_cpu(cpu_llc_shared_map, cpu); -} - -DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); -DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); -#endif struct task_struct; @@ -51,10 +30,13 @@ struct smp_ops { void (*crash_stop_other_cpus)(void); void (*smp_send_reschedule)(int cpu); - int (*cpu_up)(unsigned cpu, struct task_struct *tidle); + void (*cleanup_dead_cpu)(unsigned cpu); + void (*poll_sync_state)(void); + int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle); int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); + void (*stop_this_cpu)(void); void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); @@ -76,11 +58,6 @@ static inline void stop_other_cpus(void) smp_ops.stop_other_cpus(1); } -static inline void smp_prepare_boot_cpu(void) -{ - smp_ops.smp_prepare_boot_cpu(); -} - static inline void smp_prepare_cpus(unsigned int max_cpus) { smp_ops.smp_prepare_cpus(max_cpus); @@ -91,11 +68,6 @@ static inline void smp_cpus_done(unsigned int max_cpus) smp_ops.smp_cpus_done(max_cpus); } -static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle) -{ - return smp_ops.cpu_up(cpu, tidle); -} - static inline int __cpu_disable(void) { return smp_ops.cpu_disable(); @@ -103,15 +75,17 @@ static inline int __cpu_disable(void) static inline void __cpu_die(unsigned int cpu) { - smp_ops.cpu_die(cpu); + if (smp_ops.cpu_die) + smp_ops.cpu_die(cpu); } -static inline void play_dead(void) +static inline void __noreturn play_dead(void) { smp_ops.play_dead(); + BUG(); } -static inline void smp_send_reschedule(int cpu) +static inline void arch_smp_send_reschedule(int cpu) { smp_ops.smp_send_reschedule(cpu); } @@ -128,26 +102,27 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); +void smp_prepare_cpus_common(void); void native_smp_prepare_cpus(unsigned int max_cpus); -void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); -void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); -int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int native_kick_ap(unsigned int cpu, struct task_struct *tidle); int native_cpu_disable(void); -int common_cpu_die(unsigned int cpu); -void native_cpu_die(unsigned int cpu); -void hlt_play_dead(void); -void native_play_dead(void); +void __noreturn hlt_play_dead(void); +void __noreturn native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); -int wbinvd_on_all_cpus(void); +void wbinvd_on_all_cpus(void); +void wbinvd_on_cpus_mask(struct cpumask *cpus); +void wbnoinvd_on_all_cpus(void); +void wbnoinvd_on_cpus_mask(struct cpumask *cpus); + +void smp_kick_mwait_play_dead(void); +void __noreturn mwait_play_dead(unsigned int eax_hint); +void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); - -void smp_store_boot_cpu_info(void); -void smp_store_cpu_info(int id); asmlinkage __visible void smp_reboot_interrupt(void); __visible void smp_reschedule_interrupt(struct pt_regs *regs); @@ -159,34 +134,50 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); /* * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. + * from the initial startup. */ -#define raw_smp_processor_id() (this_cpu_read(cpu_number)) +#define raw_smp_processor_id() this_cpu_read(cpu_number) +#define __smp_processor_id() __this_cpu_read(cpu_number) -#ifdef CONFIG_X86_32 -extern int safe_smp_processor_id(void); -#else -# define safe_smp_processor_id() smp_processor_id() -#endif +static inline struct cpumask *cpu_llc_shared_mask(int cpu) +{ + return per_cpu(cpu_llc_shared_map, cpu); +} + +static inline struct cpumask *cpu_l2c_shared_mask(int cpu) +{ + return per_cpu(cpu_l2c_shared_map, cpu); +} #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() -static inline int wbinvd_on_all_cpus(void) +static inline void wbinvd_on_all_cpus(void) { wbinvd(); - return 0; } -#endif /* CONFIG_SMP */ -extern unsigned disabled_cpus; +static inline void wbinvd_on_cpus_mask(struct cpumask *cpus) +{ + wbinvd(); +} + +static inline void wbnoinvd_on_all_cpus(void) +{ + wbnoinvd(); +} + +static inline void wbnoinvd_on_cpus_mask(struct cpumask *cpus) +{ + wbnoinvd(); +} -#ifdef CONFIG_X86_LOCAL_APIC -extern int hard_smp_processor_id(void); +static inline struct cpumask *cpu_llc_shared_mask(int cpu) +{ + return (struct cpumask *)cpumask_of(0); +} -#else /* CONFIG_X86_LOCAL_APIC */ -#define hard_smp_processor_id() 0 -#endif /* CONFIG_X86_LOCAL_APIC */ +static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); } +#endif /* CONFIG_SMP */ #ifdef CONFIG_DEBUG_NMI_SELFTEST extern void nmi_selftest(void); @@ -194,5 +185,15 @@ extern void nmi_selftest(void); #define nmi_selftest() do { } while (0) #endif -#endif /* __ASSEMBLY__ */ +extern unsigned int smpboot_control; +extern unsigned long apic_mmio_base; + +#endif /* !__ASSEMBLER__ */ + +/* Control bits for startup_64 */ +#define STARTUP_READ_APICID 0x80000000 + +/* Top 8 bits are reserved for control */ +#define STARTUP_PARALLEL_MASK 0xFF000000 + #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/softirq_stack.h b/arch/x86/include/asm/softirq_stack.h new file mode 100644 index 000000000000..889d53d6a0e1 --- /dev/null +++ b/arch/x86/include/asm/softirq_stack.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SOFTIRQ_STACK_H +#define _ASM_X86_SOFTIRQ_STACK_H + +#ifdef CONFIG_X86_64 +# include <asm/irq_stack.h> +#else +# include <asm-generic/softirq_stack.h> +#endif + +#endif diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 199218719a86..3918c7a434f5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_SPARSEMEM_H #define _ASM_X86_SPARSEMEM_H +#include <linux/types.h> + #ifdef CONFIG_SPARSEMEM /* * generic non-linear memory support: @@ -10,26 +12,23 @@ * field of the struct page * * SECTION_SIZE_BITS 2^n: size of each section - * MAX_PHYSADDR_BITS 2^n: max size of physical address space - * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space + * MAX_PHYSMEM_BITS 2^n: max size of physical address space * */ #ifdef CONFIG_X86_32 # ifdef CONFIG_X86_PAE # define SECTION_SIZE_BITS 29 -# define MAX_PHYSADDR_BITS 36 # define MAX_PHYSMEM_BITS 36 # else # define SECTION_SIZE_BITS 26 -# define MAX_PHYSADDR_BITS 32 # define MAX_PHYSMEM_BITS 32 # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# define MAX_PHYSADDR_BITS (pgtable_l5_enabled() ? 52 : 44) # define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ + #endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 5393babc0598..00b7e0398210 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -4,6 +4,7 @@ #include <linux/thread_info.h> #include <asm/nospec-branch.h> +#include <asm/msr.h> /* * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR @@ -13,7 +14,7 @@ * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); +extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest); /** * x86_spec_ctrl_set_guest - Set speculation control registers for the guest @@ -24,9 +25,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, true); } /** @@ -38,9 +39,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, false); } /* AMD specific Speculative Store Bypass MSR data */ @@ -76,6 +77,16 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } +/* + * This can be used in noinstr functions & should only be called in bare + * metal context. + */ +static __always_inline void __update_spec_ctrl(u64 val) +{ + __this_cpu_write(x86_spec_ctrl_current, val); + native_wrmsrq(MSR_IA32_SPEC_CTRL, val); +} + #ifdef CONFIG_SMP extern void speculative_store_bypass_ht_init(void); #else @@ -85,4 +96,6 @@ static inline void speculative_store_bypass_ht_init(void) { } extern void speculation_ctrl_update(unsigned long tif); extern void speculation_ctrl_update_current(void); +extern bool itlb_multihit_kvm_mitigation; + #endif diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 43c029cdc3fe..46aa2c9c1bda 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -2,54 +2,45 @@ #ifndef _ASM_X86_SPECIAL_INSNS_H #define _ASM_X86_SPECIAL_INSNS_H - #ifdef __KERNEL__ - #include <asm/nops.h> +#include <asm/processor-flags.h> -/* - * Volatile isn't enough to prevent the compiler from reordering the - * read/write functions for the control registers and messing everything up. - * A memory clobber would solve the problem, but would prevent reordering of - * all loads stores around it, which can hurt performance. Solution is to - * use a variable and mimic reads and writes to it to enforce serialization - */ -extern unsigned long __force_order; +#include <linux/errno.h> +#include <linux/irqflags.h> +#include <linux/jump_label.h> + +void native_write_cr0(unsigned long val); static inline unsigned long native_read_cr0(void) { unsigned long val; - asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr0,%0" : "=r" (val)); return val; } -static inline void native_write_cr0(unsigned long val) -{ - asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); -} - -static inline unsigned long native_read_cr2(void) +static __always_inline unsigned long native_read_cr2(void) { unsigned long val; - asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr2,%0" : "=r" (val)); return val; } -static inline void native_write_cr2(unsigned long val) +static __always_inline void native_write_cr2(unsigned long val) { - asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); + asm volatile("mov %0,%%cr2": : "r" (val) : "memory"); } -static inline unsigned long __native_read_cr3(void) +static __always_inline unsigned long __native_read_cr3(void) { unsigned long val; - asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr3,%0" : "=r" (val)); return val; } -static inline void native_write_cr3(unsigned long val) +static __always_inline void native_write_cr3(unsigned long val) { - asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); + asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); } static inline unsigned long native_read_cr4(void) @@ -64,35 +55,18 @@ static inline unsigned long native_read_cr4(void) asm volatile("1: mov %%cr4, %0\n" "2:\n" _ASM_EXTABLE(1b, 2b) - : "=r" (val), "=m" (__force_order) : "0" (0)); + : "=r" (val) : "0" (0)); #else /* CR4 always exists on x86_64. */ - asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr4,%0" : "=r" (val)); #endif return val; } -static inline void native_write_cr4(unsigned long val) -{ - asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); -} - -#ifdef CONFIG_X86_64 -static inline unsigned long native_read_cr8(void) -{ - unsigned long cr8; - asm volatile("movq %%cr8,%0" : "=r" (cr8)); - return cr8; -} - -static inline void native_write_cr8(unsigned long val) -{ - asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); -} -#endif +void native_write_cr4(unsigned long val); #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { u32 ecx = 0; u32 edx, pkru; @@ -101,13 +75,11 @@ static inline u32 __read_pkru(void) * "rdpkru" instruction. Places PKRU contents in to EAX, * clears EDX and requires that ecx=0. */ - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (pkru), "=d" (edx) - : "c" (ecx)); + asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx)); return pkru; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { u32 ecx = 0, edx = 0; @@ -115,26 +87,51 @@ static inline void __write_pkru(u32 pkru) * "wrpkru" instruction. Loads contents in EAX to PKRU, * requires that ecx = edx = 0. */ - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (pkru), "c"(ecx), "d"(edx)); + asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx)); } + #else -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { return 0; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { } #endif -static inline void native_wbinvd(void) +/* + * Write back all modified lines in all levels of cache associated with this + * logical processor to main memory, and then invalidate all caches. Depending + * on the micro-architecture, WBINVD (and WBNOINVD below) may or may not affect + * lower level caches associated with another logical processor that shares any + * level of this processor's cache hierarchy. + */ +static __always_inline void wbinvd(void) { - asm volatile("wbinvd": : :"memory"); + asm volatile("wbinvd" : : : "memory"); } -extern asmlinkage void native_load_gs_index(unsigned); +/* Instruction encoding provided for binutils backwards compatibility. */ +#define ASM_WBNOINVD _ASM_BYTES(0xf3,0x0f,0x09) + +/* + * Write back all modified lines in all levels of cache associated with this + * logical processor to main memory, but do NOT explicitly invalidate caches, + * i.e. leave all/most cache lines in the hierarchy in non-modified state. + */ +static __always_inline void wbnoinvd(void) +{ + /* + * Explicitly encode WBINVD if X86_FEATURE_WBNOINVD is unavailable even + * though WBNOINVD is backwards compatible (it's simply WBINVD with an + * ignored REP prefix), to guarantee that WBNOINVD isn't used if it + * needs to be avoided for any reason. For all supported usage in the + * kernel, WBINVD is functionally a superset of WBNOINVD. + */ + alternative("wbinvd", ASM_WBNOINVD, X86_FEATURE_WBNOINVD); +} static inline unsigned long __read_cr4(void) { @@ -155,12 +152,12 @@ static inline void write_cr0(unsigned long x) native_write_cr0(x); } -static inline unsigned long read_cr2(void) +static __always_inline unsigned long read_cr2(void) { return native_read_cr2(); } -static inline void write_cr2(unsigned long x) +static __always_inline void write_cr2(unsigned long x) { native_write_cr2(x); } @@ -183,62 +180,128 @@ static inline void __write_cr4(unsigned long x) { native_write_cr4(x); } +#endif /* CONFIG_PARAVIRT_XXL */ -static inline void wbinvd(void) +static __always_inline void clflush(volatile void *__p) { - native_wbinvd(); + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); } -#ifdef CONFIG_X86_64 - -static inline unsigned long read_cr8(void) +static inline void clflushopt(volatile void *__p) { - return native_read_cr8(); + alternative_io("ds clflush %0", + "clflushopt %0", X86_FEATURE_CLFLUSHOPT, + "+m" (*(volatile char __force *)__p)); } -static inline void write_cr8(unsigned long x) +static inline void clwb(volatile void *__p) { - native_write_cr8(x); + volatile struct { char x[64]; } *p = __p; + + asm_inline volatile(ALTERNATIVE_2( + "ds clflush %0", + "clflushopt %0", X86_FEATURE_CLFLUSHOPT, + "clwb %0", X86_FEATURE_CLWB) + : "+m" (*p)); } -static inline void load_gs_index(unsigned selector) +#ifdef CONFIG_X86_USER_SHADOW_STACK +static inline int write_user_shstk_64(u64 __user *addr, u64 val) { - native_load_gs_index(selector); + asm goto("1: wrussq %[val], %[addr]\n" + _ASM_EXTABLE(1b, %l[fail]) + :: [addr] "m" (*addr), [val] "r" (val) + :: fail); + return 0; +fail: + return -EFAULT; } +#endif /* CONFIG_X86_USER_SHADOW_STACK */ -#endif +#define nop() asm volatile ("nop") -#endif /* CONFIG_PARAVIRT_XXL */ +static __always_inline void serialize(void) +{ + /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */ + asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory"); +} -static inline void clflush(volatile void *__p) +/* The dst parameter must be 64-bytes aligned */ +static inline void movdir64b(void *dst, const void *src) { - asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); + const struct { char _[64]; } *__src = src; + struct { char _[64]; } *__dst = dst; + + /* + * MOVDIR64B %(rdx), rax. + * + * Both __src and __dst must be memory constraints in order to tell the + * compiler that no other memory accesses should be reordered around + * this one. + * + * Also, both must be supplied as lvalues because this tells + * the compiler what the object is (its size) the instruction accesses. + * I.e., not the pointers but what they point to, thus the deref'ing '*'. + */ + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : "+m" (*__dst) + : "m" (*__src), "a" (__dst), "d" (__src)); } -static inline void clflushopt(volatile void *__p) +static inline void movdir64b_io(void __iomem *dst, const void *src) { - alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0", - ".byte 0x66; clflush %P0", - X86_FEATURE_CLFLUSHOPT, - "+m" (*(volatile char __force *)__p)); + movdir64b((void __force *)dst, src); } -static inline void clwb(volatile void *__p) +/** + * enqcmds - Enqueue a command in supervisor (CPL0) mode + * @dst: destination, in MMIO space (must be 512-bit aligned) + * @src: 512 bits memory operand + * + * The ENQCMDS instruction allows software to write a 512-bit command to + * a 512-bit-aligned special MMIO region that supports the instruction. + * A return status is loaded into the ZF flag in the RFLAGS register. + * ZF = 0 equates to success, and ZF = 1 indicates retry or error. + * + * This function issues the ENQCMDS instruction to submit data from + * kernel space to MMIO space, in a unit of 512 bits. Order of data access + * is not guaranteed, nor is a memory barrier performed afterwards. It + * returns 0 on success and -EAGAIN on failure. + * + * Warning: Do not use this helper unless your driver has checked that the + * ENQCMDS instruction is supported on the platform and the device accepts + * ENQCMDS. + */ +static inline int enqcmds(void __iomem *dst, const void *src) { - volatile struct { char x[64]; } *p = __p; + const struct { char _[64]; } *__src = src; + struct { char _[64]; } __iomem *__dst = dst; + bool zf; - asm volatile(ALTERNATIVE_2( - ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])", - ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ - X86_FEATURE_CLFLUSHOPT, - ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ - X86_FEATURE_CLWB) - : [p] "+m" (*p) - : [pax] "a" (p)); -} + /* + * ENQCMDS %(rdx), rax + * + * See movdir64b()'s comment on operand specification. + */ + asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90" + : "=@ccz" (zf), "+m" (*__dst) + : "m" (*__src), "a" (__dst), "d" (__src)); -#define nop() asm volatile ("nop") + /* Submission failure is indicated via EFLAGS.ZF=1 */ + if (zf) + return -EAGAIN; + return 0; +} + +static __always_inline void tile_release(void) +{ + /* + * Instruction opcode for TILERELEASE; supported in binutils + * version >= 2.36. + */ + asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0"); +} #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index bf3e34b25afc..323db6c5852a 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -3,29 +3,7 @@ #define _ASM_X86_SPINLOCK_TYPES_H #include <linux/types.h> - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define __TICKET_LOCK_INC 2 -#define TICKET_SLOWPATH_FLAG ((__ticket_t)1) -#else -#define __TICKET_LOCK_INC 1 -#define TICKET_SLOWPATH_FLAG ((__ticket_t)0) -#endif - -#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC)) -typedef u8 __ticket_t; -typedef u16 __ticketpair_t; -#else -typedef u16 __ticket_t; -typedef u32 __ticketpair_t; -#endif - -#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC) - -#define TICKET_SHIFT (sizeof(__ticket_t) * 8) - #include <asm-generic/qspinlock_types.h> - #include <asm-generic/qrwlock_types.h> #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h deleted file mode 100644 index e0975e9c4f47..000000000000 --- a/arch/x86/include/asm/sta2x11.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Header file for STMicroelectronics ConneXt (STA2X11) IOHub - */ -#ifndef __ASM_STA2X11_H -#define __ASM_STA2X11_H - -#include <linux/pci.h> - -/* This needs to be called from the MFD to configure its sub-devices */ -struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev); - -#endif /* __ASM_STA2X11_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 8ec97a62c245..cd761b14eb02 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -2,33 +2,10 @@ /* * GCC stack protector support. * - * Stack protector works by putting predefined pattern at the start of + * Stack protector works by putting a predefined pattern at the start of * the stack frame and verifying that it hasn't been overwritten when - * returning from the function. The pattern is called stack canary - * and unfortunately gcc requires it to be at a fixed offset from %gs. - * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64 - * and x86_32 use segment registers differently and thus handles this - * requirement differently. - * - * On x86_64, %gs is shared by percpu area and stack canary. All - * percpu symbols are zero based and %gs points to the base of percpu - * area. The first occupant of the percpu area is always - * irq_stack_union which contains stack_canary at offset 40. Userland - * %gs is always saved and restored on kernel entry and exit using - * swapgs, so stack protector doesn't add any complexity there. - * - * On x86_32, it's slightly more complicated. As in x86_64, %gs is - * used for userland TLS. Unfortunately, some processors are much - * slower at loading segment registers with different value when - * entering and leaving the kernel, so the kernel uses %fs for percpu - * area and manages %gs lazily so that %gs is switched only when - * necessary, usually during task switch. - * - * As gcc requires the stack canary at %gs:20, %gs can't be managed - * lazily if stack protector is enabled, so the kernel saves and - * restores userland %gs on kernel entry and exit. This behavior is - * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in - * system.h to hide the details. + * returning from the function. The pattern is called the stack canary + * and is a unique value for each task. */ #ifndef _ASM_STACKPROTECTOR_H @@ -41,85 +18,40 @@ #include <asm/percpu.h> #include <asm/desc.h> -#include <linux/random.h> #include <linux/sched.h> -/* - * 24 byte read-only segment initializer for stack canary. Linker - * can't handle the address bit shifting. Address will be set in - * head_32 for boot CPU and setup_per_cpu_areas() for others. - */ -#define GDT_STACK_CANARY_INIT \ - [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18), +DECLARE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard); /* * Initialize the stackprotector canary value. * - * NOTE: this must only be called from functions that never return, + * NOTE: this must only be called from functions that never return * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; - u64 tsc; - -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); -#endif - /* - * We both use the random pool and the current TSC as a source - * of randomness. The TSC only matters for very early init, - * there it already has some randomness on most systems. Later - * on during the bootup the random pool has true entropy too. - */ - get_random_bytes(&canary, sizeof(canary)); - tsc = rdtsc(); - canary += tsc + (tsc << 32UL); - canary &= CANARY_MASK; + unsigned long canary = get_random_canary(); current->stack_canary = canary; -#ifdef CONFIG_X86_64 - this_cpu_write(irq_stack_union.stack_canary, canary); -#else - this_cpu_write(stack_canary.canary, canary); -#endif + this_cpu_write(__stack_chk_guard, canary); } -static inline void setup_stack_canary_segment(int cpu) +static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) { -#ifdef CONFIG_X86_32 - unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); - struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu); - struct desc_struct desc; - - desc = gdt_table[GDT_ENTRY_STACK_CANARY]; - set_desc_base(&desc, canary); - write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); -#endif -} - -static inline void load_stack_canary_segment(void) -{ -#ifdef CONFIG_X86_32 - asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory"); -#endif + per_cpu(__stack_chk_guard, cpu) = idle->stack_canary; } #else /* STACKPROTECTOR */ -#define GDT_STACK_CANARY_INIT - /* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */ -static inline void setup_stack_canary_segment(int cpu) +static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle) { } -static inline void load_stack_canary_segment(void) -{ -#ifdef CONFIG_X86_32 - asm volatile ("mov %0, %%gs" : : "r" (0)); -#endif -} - #endif /* STACKPROTECTOR */ #endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f335aad404a4..3881b5333eb8 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -9,6 +9,8 @@ #include <linux/uaccess.h> #include <linux/ptrace.h> + +#include <asm/cpu_entry_area.h> #include <asm/switch_to.h> enum stack_type { @@ -33,6 +35,18 @@ bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); +bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task, + struct stack_info *info); + +static __always_inline +bool get_stack_guard_info(unsigned long *stack, struct stack_info *info) +{ + /* make sure it's not in the stack proper */ + if (get_stack_info_noinstr(stack, current, info)) + return false; + /* but if it is in the page below it, we hit a guard */ + return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info); +} const char *stack_type_name(enum stack_type type); @@ -76,7 +90,7 @@ static inline unsigned long * get_stack_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) - return (unsigned long *)kernel_stack_pointer(regs); + return (unsigned long *)regs->sp; if (task == current) return __builtin_frame_address(0); @@ -84,9 +98,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) return (unsigned long *)task->thread.sp; } -void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl); - /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; @@ -98,19 +109,6 @@ struct stack_frame_ia32 { u32 return_address; }; -static inline unsigned long caller_frame_pointer(void) -{ - struct stack_frame *frame; - - frame = __builtin_frame_address(0); - -#ifdef CONFIG_FRAME_POINTER - frame = frame->next_frame; -#endif - - return (unsigned long)frame; -} - void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h new file mode 100644 index 000000000000..4cd725a8fe91 --- /dev/null +++ b/arch/x86/include/asm/static_call.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_STATIC_CALL_H +#define _ASM_STATIC_CALL_H + +#include <asm/text-patching.h> + +/* + * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which + * uses the current value of the key->func pointer to do an indirect jump to + * the function. This trampoline is only used during boot, before the call + * sites get patched by static_call_update(). The name of this trampoline has + * a magical aspect: objtool uses it to find static call sites so it can create + * the .static_call_sites section. + * + * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which + * does a direct jump to the function. The direct jump gets patched by + * static_call_update(). + * + * Having the trampoline in a special section forces GCC to emit a JMP.d32 when + * it does tail-call optimization on the call; since you cannot compute the + * relative displacement across sections. + */ + +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ +#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ + asm(".pushsection .static_call.text, \"ax\" \n" \ + ".align 4 \n" \ + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ + STATIC_CALL_TRAMP_STR(name) ": \n" \ + ANNOTATE_NOENDBR " \n" \ + insns " \n" \ + ".byte 0x0f, 0xb9, 0xcc \n" \ + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ + ".popsection \n") + +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + +#ifdef CONFIG_MITIGATION_RETHUNK +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif + +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) + +#define ARCH_ADD_TRAMP_KEY(name) \ + asm(".pushsection .static_call_tramp_key, \"a\" \n" \ + ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \ + ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ + ".popsection \n") + +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + +extern void __static_call_update_early(void *tramp, void *func); + +#define static_call_update_early(name, _func) \ +({ \ + typeof(&STATIC_CALL_TRAMP(name)) __F = (_func); \ + if (static_call_initialized) { \ + __static_call_update(&STATIC_CALL_KEY(name), \ + STATIC_CALL_TRAMP_ADDR(name), __F);\ + } else { \ + WRITE_ONCE(STATIC_CALL_KEY(name).func, _func); \ + __static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\ + __F); \ + } \ +}) + +#endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h index c3c2c1914d65..9cb5aae7fba9 100644 --- a/arch/x86/include/asm/string.h +++ b/arch/x86/include/asm/string.h @@ -1,6 +1,32 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_STRING_H +#define _ASM_X86_STRING_H + #ifdef CONFIG_X86_32 # include <asm/string_32.h> #else # include <asm/string_64.h> #endif + +static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len) +{ + void *ret = to; + + asm volatile("rep movsb" + : "+D" (to), "+S" (from), "+c" (len) + : : "memory"); + return ret; +} + +static __always_inline void *__inline_memset(void *s, int v, size_t n) +{ + void *ret = s; + + asm volatile("rep stosb" + : "+D" (s), "+c" (n) + : "a" ((uint8_t)v) + : "memory"); + return ret; +} + +#endif /* _ASM_X86_STRING_H */ diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 55d392c6bd29..e9cce169bb4c 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -33,11 +33,11 @@ extern size_t strlen(const char *s); static __always_inline void *__memcpy(void *to, const void *from, size_t n) { int d0, d1, d2; - asm volatile("rep ; movsl\n\t" + asm volatile("rep movsl\n\t" "movl %4,%%ecx\n\t" "andl $3,%%ecx\n\t" "jz 1f\n\t" - "rep ; movsb\n\t" + "rep movsb\n\t" "1:" : "=&c" (d0), "=&D" (d1), "=&S" (d2) : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) @@ -89,7 +89,7 @@ static __always_inline void *__constant_memcpy(void *to, const void *from, if (n >= 5 * 4) { /* large block: use rep prefix */ int ecx; - asm volatile("rep ; movsl" + asm volatile("rep movsl" : "=&c" (ecx), "=&D" (edi), "=&S" (esi) : "0" (n / 4), "1" (edi), "2" (esi) : "memory" @@ -146,49 +146,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from, extern void *memcpy(void *, const void *, size_t); #ifndef CONFIG_FORTIFY_SOURCE -#ifdef CONFIG_X86_USE_3DNOW -#include <asm/mmx.h> - -/* - * This CPU favours 3DNow strongly (eg AMD Athlon) - */ - -static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) -{ - if (len < 512) - return __constant_memcpy(to, from, len); - return _mmx_memcpy(to, from, len); -} - -static inline void *__memcpy3d(void *to, const void *from, size_t len) -{ - if (len < 512) - return __memcpy(to, from, len); - return _mmx_memcpy(to, from, len); -} - -#define memcpy(t, f, n) \ - (__builtin_constant_p((n)) \ - ? __constant_memcpy3d((t), (f), (n)) \ - : __memcpy3d((t), (f), (n))) - -#else - -/* - * No 3D Now! - */ - -#if (__GNUC__ >= 4) #define memcpy(t, f, n) __builtin_memcpy(t, f, n) -#else -#define memcpy(t, f, n) \ - (__builtin_constant_p((n)) \ - ? __constant_memcpy((t), (f), (n)) \ - : __memcpy((t), (f), (n))) -#endif -#endif #endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMMOVE @@ -205,8 +165,7 @@ extern void *memchr(const void *cs, int c, size_t count); static inline void *__memset_generic(void *s, char c, size_t count) { int d0, d1; - asm volatile("rep\n\t" - "stosb" + asm volatile("rep stosb" : "=&c" (d0), "=&D" (d1) : "a" (c), "1" (s), "0" (count) : "memory"); @@ -216,29 +175,6 @@ static inline void *__memset_generic(void *s, char c, size_t count) /* we might want to write optimized versions of these later */ #define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) -/* - * memset(x, 0, y) is a reasonably common thing to do, so we want to fill - * things 32 bits at a time even when we don't know the size of the - * area at compile-time.. - */ -static __always_inline -void *__constant_c_memset(void *s, unsigned long c, size_t count) -{ - int d0, d1; - asm volatile("rep ; stosl\n\t" - "testb $2,%b3\n\t" - "je 1f\n\t" - "stosw\n" - "1:\ttestb $1,%b3\n\t" - "je 2f\n\t" - "stosb\n" - "2:" - : "=&c" (d0), "=&D" (d1) - : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) - : "memory"); - return s; -} - /* Added by Gertjan van Wingerde to make minix and sysv module work */ #define __HAVE_ARCH_STRNLEN extern size_t strnlen(const char *s, size_t count); @@ -247,72 +183,6 @@ extern size_t strnlen(const char *s, size_t count); #define __HAVE_ARCH_STRSTR extern char *strstr(const char *cs, const char *ct); -/* - * This looks horribly ugly, but the compiler can optimize it totally, - * as we by now know that both pattern and count is constant.. - */ -static __always_inline -void *__constant_c_and_count_memset(void *s, unsigned long pattern, - size_t count) -{ - switch (count) { - case 0: - return s; - case 1: - *(unsigned char *)s = pattern & 0xff; - return s; - case 2: - *(unsigned short *)s = pattern & 0xffff; - return s; - case 3: - *(unsigned short *)s = pattern & 0xffff; - *((unsigned char *)s + 2) = pattern & 0xff; - return s; - case 4: - *(unsigned long *)s = pattern; - return s; - } - -#define COMMON(x) \ - asm volatile("rep ; stosl" \ - x \ - : "=&c" (d0), "=&D" (d1) \ - : "a" (eax), "0" (count/4), "1" ((long)s) \ - : "memory") - - { - int d0, d1; -#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 - /* Workaround for broken gcc 4.0 */ - register unsigned long eax asm("%eax") = pattern; -#else - unsigned long eax = pattern; -#endif - - switch (count % 4) { - case 0: - COMMON(""); - return s; - case 1: - COMMON("\n\tstosb"); - return s; - case 2: - COMMON("\n\tstosw"); - return s; - default: - COMMON("\n\tstosw\n\tstosb"); - return s; - } - } - -#undef COMMON -} - -#define __constant_c_x_memset(s, c, count) \ - (__builtin_constant_p(count) \ - ? __constant_c_and_count_memset((s), (c), (count)) \ - : __constant_c_memset((s), (c), (count))) - #define __memset(s, c, count) \ (__builtin_constant_p(count) \ ? __constant_count_memset((s), (c), (count)) \ @@ -321,23 +191,14 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, size_t); #ifndef CONFIG_FORTIFY_SOURCE -#if (__GNUC__ >= 4) #define memset(s, c, count) __builtin_memset(s, c, count) -#else -#define memset(s, c, count) \ - (__builtin_constant_p(c) \ - ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ - (count)) \ - : __memset((s), (c), (count))) -#endif #endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMSET16 static inline void *memset16(uint16_t *s, uint16_t v, size_t n) { int d0, d1; - asm volatile("rep\n\t" - "stosw" + asm volatile("rep stosw" : "=&c" (d0), "=&D" (d1) : "a" (v), "1" (s), "0" (n) : "memory"); @@ -348,8 +209,7 @@ static inline void *memset16(uint16_t *s, uint16_t v, size_t n) static inline void *memset32(uint32_t *s, uint32_t v, size_t n) { int d0, d1; - asm volatile("rep\n\t" - "stosl" + asm volatile("rep stosl" : "=&c" (d0), "=&D" (d1) : "a" (v), "1" (s), "0" (n) : "memory"); diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 4e4194e21a09..4635616863f5 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -10,68 +10,68 @@ /* Even with __builtin_ the compiler may decide to use the out of line function. */ +#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) +#include <linux/kmsan_string.h> +#endif + #define __HAVE_ARCH_MEMCPY 1 extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); -#ifndef CONFIG_FORTIFY_SOURCE -#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 -#define memcpy(dst, src, len) \ -({ \ - size_t __len = (len); \ - void *__ret; \ - if (__builtin_constant_p(len) && __len >= 64) \ - __ret = __memcpy((dst), (src), __len); \ - else \ - __ret = __builtin_memcpy((dst), (src), __len); \ - __ret; \ -}) -#endif -#endif /* !CONFIG_FORTIFY_SOURCE */ - #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); void *__memset(void *s, int c, size_t n); +KCFI_REFERENCE(__memset); +/* + * KMSAN needs to instrument as much code as possible. Use C versions of + * memsetXX() from lib/string.c under KMSAN. + */ +#if !defined(CONFIG_KMSAN) #define __HAVE_ARCH_MEMSET16 static inline void *memset16(uint16_t *s, uint16_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosw" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const auto s0 = s; + asm volatile ( + "rep stosw" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET32 static inline void *memset32(uint32_t *s, uint32_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosl" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const auto s0 = s; + asm volatile ( + "rep stosl" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET64 static inline void *memset64(uint64_t *s, uint64_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosq" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const auto s0 = s; + asm volatile ( + "rep stosq" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } +#endif #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t count); void *__memmove(void *dest, const void *src, size_t count); +KCFI_REFERENCE(__memmove); int memcmp(const void *cs, const void *ct, size_t count); size_t strlen(const char *s); @@ -79,56 +79,6 @@ char *strcpy(char *dest, const char *src); char *strcat(char *dest, const char *src); int strcmp(const char *cs, const char *ct); -#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) - -/* - * For files that not instrumented (e.g. mm/slub.c) we - * should use not instrumented version of mem* functions. - */ - -#undef memcpy -#define memcpy(dst, src, len) __memcpy(dst, src, len) -#define memmove(dst, src, len) __memmove(dst, src, len) -#define memset(s, c, n) __memset(s, c, n) - -#ifndef __NO_FORTIFY -#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ -#endif - -#endif - -#define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, - size_t cnt); -DECLARE_STATIC_KEY_FALSE(mcsafe_key); - -/** - * memcpy_mcsafe - copy memory with indication if a machine check happened - * - * @dst: destination address - * @src: source address - * @cnt: number of bytes to copy - * - * Low level memory copy function that catches machine checks - * We only call into the "safe" function on systems that can - * actually do machine check recovery. Everyone else can just - * use memcpy(). - * - * Return 0 for success, or number of bytes not copied if there was an - * exception. - */ -static __always_inline __must_check unsigned long -memcpy_mcsafe(void *dst, const void *src, size_t cnt) -{ -#ifdef CONFIG_X86_MCE - if (static_branch_unlikely(&mcsafe_key)) - return __memcpy_mcsafe(dst, src, cnt); - else -#endif - memcpy(dst, src, cnt); - return 0; -} - #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 void __memcpy_flushcache(void *dst, const void *src, size_t cnt); diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index fdbd9d7b7bca..e8e5aab06255 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -9,19 +9,12 @@ #include <asm/desc.h> #include <asm/fpu/api.h> +#include <asm/msr.h> /* image of the saved processor state */ struct saved_context { - /* - * On x86_32, all segment registers, with the possible exception of - * gs, are saved at kernel entry in pt_regs. - */ -#ifdef CONFIG_X86_32_LAZY_GS - u16 gs; -#endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; struct desc_ptr gdt_desc; struct desc_ptr idt; @@ -30,6 +23,12 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + /* + * On x86_32, all segment registers except gs are saved at kernel + * entry in pt_regs. + */ + u16 gs; + bool misc_enable_saved; } __attribute__((packed)); /* routines for saving/restoring kernel state */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index a7af9f53c0cb..b512f9665f78 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -9,14 +9,19 @@ #include <asm/desc.h> #include <asm/fpu/api.h> +#include <asm/msr.h> /* * Image of the saved processor state, used by the low level ACPI suspend to * RAM code and by the low level hibernation code. * - * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that - * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, - * still work as required. + * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S + * and make sure that __save/__restore_processor_state(), defined in + * arch/x86/power/cpu.c, still work as required. + * + * Because the structure is packed, make sure to avoid unaligned members. For + * optimisation purposes but also because tools like kmemleak only search for + * pointers that are aligned. */ struct saved_context { struct pt_regs regs; @@ -34,9 +39,8 @@ struct saved_context { */ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; - unsigned long cr0, cr2, cr3, cr4, cr8; + unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; unsigned long efer; u16 gdt_pad; /* Unused */ @@ -48,6 +52,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); #define loaddebug(thread,register) \ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index dec9c1e84c78..56aa99503dc4 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -3,10 +3,56 @@ #define __SVM_H #include <uapi/asm/svm.h> - +#include <uapi/asm/kvm.h> + +#include <hyperv/hvhdk.h> + +/* + * 32-bit intercept words in the VMCB Control Area, starting + * at Byte offset 000h. + */ + +enum intercept_words { + INTERCEPT_CR = 0, + INTERCEPT_DR, + INTERCEPT_EXCEPTION, + INTERCEPT_WORD3, + INTERCEPT_WORD4, + INTERCEPT_WORD5, + MAX_INTERCEPT, +}; enum { - INTERCEPT_INTR, + /* Byte offset 000h (word 0) */ + INTERCEPT_CR0_READ = 0, + INTERCEPT_CR3_READ = 3, + INTERCEPT_CR4_READ = 4, + INTERCEPT_CR8_READ = 8, + INTERCEPT_CR0_WRITE = 16, + INTERCEPT_CR3_WRITE = 16 + 3, + INTERCEPT_CR4_WRITE = 16 + 4, + INTERCEPT_CR8_WRITE = 16 + 8, + /* Byte offset 004h (word 1) */ + INTERCEPT_DR0_READ = 32, + INTERCEPT_DR1_READ, + INTERCEPT_DR2_READ, + INTERCEPT_DR3_READ, + INTERCEPT_DR4_READ, + INTERCEPT_DR5_READ, + INTERCEPT_DR6_READ, + INTERCEPT_DR7_READ, + INTERCEPT_DR0_WRITE = 48, + INTERCEPT_DR1_WRITE, + INTERCEPT_DR2_WRITE, + INTERCEPT_DR3_WRITE, + INTERCEPT_DR4_WRITE, + INTERCEPT_DR5_WRITE, + INTERCEPT_DR6_WRITE, + INTERCEPT_DR7_WRITE, + /* Byte offset 008h (word 2) */ + INTERCEPT_EXCEPTION_OFFSET = 64, + /* Byte offset 00Ch (word 3) */ + INTERCEPT_INTR = 96, INTERCEPT_NMI, INTERCEPT_SMI, INTERCEPT_INIT, @@ -38,7 +84,8 @@ enum { INTERCEPT_TASK_SWITCH, INTERCEPT_FERR_FREEZE, INTERCEPT_SHUTDOWN, - INTERCEPT_VMRUN, + /* Byte offset 010h (word 4) */ + INTERCEPT_VMRUN = 128, INTERCEPT_VMMCALL, INTERCEPT_VMLOAD, INTERCEPT_VMSAVE, @@ -52,15 +99,31 @@ enum { INTERCEPT_MWAIT, INTERCEPT_MWAIT_COND, INTERCEPT_XSETBV, + INTERCEPT_RDPRU, + TRAP_EFER_WRITE, + TRAP_CR0_WRITE, + TRAP_CR1_WRITE, + TRAP_CR2_WRITE, + TRAP_CR3_WRITE, + TRAP_CR4_WRITE, + TRAP_CR5_WRITE, + TRAP_CR6_WRITE, + TRAP_CR7_WRITE, + TRAP_CR8_WRITE, + /* Byte offset 014h (word 5) */ + INTERCEPT_INVLPGB = 160, + INTERCEPT_INVLPGB_ILLEGAL, + INTERCEPT_INVPCID, + INTERCEPT_MCOMMIT, + INTERCEPT_TLBSYNC, + INTERCEPT_BUSLOCK, + INTERCEPT_IDLE_HLT = 166, }; struct __attribute__ ((__packed__)) vmcb_control_area { - u32 intercept_cr; - u32 intercept_dr; - u32 intercept_exceptions; - u64 intercept; - u8 reserved_1[40]; + u32 intercepts[MAX_INTERCEPT]; + u32 reserved_1[15 - MAX_INTERCEPT]; u16 pause_filter_thresh; u16 pause_filter_count; u64 iopm_base_pa; @@ -81,7 +144,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 exit_int_info_err; u64 nested_ctl; u64 avic_vapic_bar; - u8 reserved_4[8]; + u64 ghcb_gpa; u32 event_inj; u32 event_inj_err; u64 nested_cr3; @@ -95,7 +158,22 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u8 reserved_6[8]; /* Offset 0xe8 */ u64 avic_logical_id; /* Offset 0xf0 */ u64 avic_physical_id; /* Offset 0xf8 */ - u8 reserved_7[768]; + u8 reserved_7[8]; + u64 vmsa_pa; /* Used for an SEV-ES guest */ + u8 reserved_8[16]; + u16 bus_lock_counter; /* Offset 0x120 */ + u8 reserved_9[22]; + u64 allowed_sev_features; /* Offset 0x138 */ + u64 guest_sev_features; /* Offset 0x140 */ + u8 reserved_10[664]; + /* + * Offset 0x3e0, 32 bytes reserved + * for use by hypervisor/software. + */ + union { + struct hv_vmcb_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; @@ -112,25 +190,40 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_GIF_SHIFT 9 #define V_GIF_MASK (1 << V_GIF_SHIFT) +#define V_NMI_PENDING_SHIFT 11 +#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT) + +#define V_NMI_BLOCKING_SHIFT 12 +#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT) + #define V_INTR_PRIO_SHIFT 16 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) + #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) #define V_GIF_ENABLE_SHIFT 25 #define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) +#define V_NMI_ENABLE_SHIFT 26 +#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT) + #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) +#define X2APIC_MODE_SHIFT 30 +#define X2APIC_MODE_MASK (1 << X2APIC_MODE_SHIFT) + #define LBR_CTL_ENABLE_MASK BIT_ULL(0) #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) -#define SVM_INTERRUPT_SHADOW_MASK 1 +#define SVM_INTERRUPT_SHADOW_MASK BIT_ULL(0) +#define SVM_GUEST_INTERRUPT_MASK BIT_ULL(1) #define SVM_IOIO_STR_SHIFT 2 #define SVM_IOIO_REP_SHIFT 3 @@ -143,21 +236,85 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) -#define SVM_VM_CR_VALID_MASK 0x001fULL -#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL -#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL - #define SVM_NESTED_CTL_NP_ENABLE BIT(0) #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) +#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) + + +#define SVM_TSC_RATIO_RSVD 0xffffff0000000000ULL +#define SVM_TSC_RATIO_MIN 0x0000000000000001ULL +#define SVM_TSC_RATIO_MAX 0x000000ffffffffffULL +#define SVM_TSC_RATIO_DEFAULT 0x0100000000ULL + + +/* AVIC */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +/* + * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible + * tables. GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate + * GA log interrupts to wake the vCPU (because it's blocking or about to block). + */ +#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR BIT_ULL(61) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK GENMASK_ULL(51, 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) +#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL) + +#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) + +#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 +#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 +#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF + +enum avic_ipi_failure_cause { + AVIC_IPI_FAILURE_INVALID_INT_TYPE, + AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, + AVIC_IPI_FAILURE_INVALID_TARGET, + AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, + AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, +}; -struct __attribute__ ((__packed__)) vmcb_seg { +#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(11, 0) + +/* + * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as + * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually. + */ +#define AVIC_MAX_PHYSICAL_ID 0XFEULL + +/* + * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511). + * With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095). + */ +#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL +#define X2AVIC_4K_MAX_PHYSICAL_ID 0xFFFUL + +static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID); +static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID); +static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID); + +#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0) +#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3) +#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4) +#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5) +#define SVM_SEV_FEAT_SECURE_TSC BIT(9) + +#define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63) + +struct vmcb_seg { u16 selector; u16 attrib; u32 limit; u64 base; -}; +} __packed; -struct __attribute__ ((__packed__)) vmcb_save_area { +/* Save area definition for legacy and SEV-MEM guests */ +struct vmcb_save_area { struct vmcb_seg es; struct vmcb_seg cs; struct vmcb_seg ss; @@ -168,11 +325,13 @@ struct __attribute__ ((__packed__)) vmcb_save_area { struct vmcb_seg ldtr; struct vmcb_seg idtr; struct vmcb_seg tr; - u8 reserved_1[43]; + /* Reserved fields are named following their struct offset */ + u8 reserved_0xa0[42]; + u8 vmpl; u8 cpl; - u8 reserved_2[4]; + u8 reserved_0xcc[4]; u64 efer; - u8 reserved_3[112]; + u8 reserved_0xd8[112]; u64 cr4; u64 cr3; u64 cr0; @@ -180,9 +339,11 @@ struct __attribute__ ((__packed__)) vmcb_save_area { u64 dr6; u64 rflags; u64 rip; - u8 reserved_4[88]; + u8 reserved_0x180[88]; u64 rsp; - u8 reserved_5[24]; + u64 s_cet; + u64 ssp; + u64 isst_addr; u64 rax; u64 star; u64 lstar; @@ -193,24 +354,251 @@ struct __attribute__ ((__packed__)) vmcb_save_area { u64 sysenter_esp; u64 sysenter_eip; u64 cr2; - u8 reserved_6[32]; + u8 reserved_0x248[32]; u64 g_pat; u64 dbgctl; u64 br_from; u64 br_to; u64 last_excp_from; u64 last_excp_to; -}; + u8 reserved_0x298[72]; + u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ +} __packed; -struct __attribute__ ((__packed__)) vmcb { +/* Save area definition for SEV-ES and SEV-SNP guests */ +struct sev_es_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u64 pl0_ssp; + u64 pl1_ssp; + u64 pl2_ssp; + u64 pl3_ssp; + u64 u_cet; + u8 reserved_0xc8[2]; + u8 vmpl; + u8 cpl; + u8 reserved_0xcc[4]; + u64 efer; + u8 reserved_0xd8[104]; + u64 xss; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u64 dr0; + u64 dr1; + u64 dr2; + u64 dr3; + u64 dr0_addr_mask; + u64 dr1_addr_mask; + u64 dr2_addr_mask; + u64 dr3_addr_mask; + u8 reserved_0x1c0[24]; + u64 rsp; + u64 s_cet; + u64 ssp; + u64 isst_addr; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_0x248[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; + u8 reserved_0x298[80]; + u32 pkru; + u32 tsc_aux; + u64 tsc_scale; + u64 tsc_offset; + u8 reserved_0x300[8]; + u64 rcx; + u64 rdx; + u64 rbx; + u64 reserved_0x320; /* rsp already available at 0x01d8 */ + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u8 reserved_0x380[16]; + u64 guest_exit_info_1; + u64 guest_exit_info_2; + u64 guest_exit_int_info; + u64 guest_nrip; + u64 sev_features; + u64 vintr_ctrl; + u64 guest_exit_code; + u64 virtual_tom; + u64 tlb_id; + u64 pcpu_id; + u64 event_inj; + u64 xcr0; + u8 reserved_0x3f0[16]; + + /* Floating point area */ + u64 x87_dp; + u32 mxcsr; + u16 x87_ftw; + u16 x87_fsw; + u16 x87_fcw; + u16 x87_fop; + u16 x87_ds; + u16 x87_cs; + u64 x87_rip; + u8 fpreg_x87[80]; + u8 fpreg_xmm[256]; + u8 fpreg_ymm[256]; +} __packed; + +struct ghcb_save_area { + u8 reserved_0x0[203]; + u8 cpl; + u8 reserved_0xcc[116]; + u64 xss; + u8 reserved_0x148[24]; + u64 dr7; + u8 reserved_0x168[16]; + u64 rip; + u8 reserved_0x180[88]; + u64 rsp; + u8 reserved_0x1e0[24]; + u64 rax; + u8 reserved_0x200[264]; + u64 rcx; + u64 rdx; + u64 rbx; + u8 reserved_0x320[8]; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u8 reserved_0x380[16]; + u64 sw_exit_code; + u64 sw_exit_info_1; + u64 sw_exit_info_2; + u64 sw_scratch; + u8 reserved_0x3b0[56]; + u64 xcr0; + u8 valid_bitmap[16]; + u64 x87_state_gpa; +} __packed; + +#define GHCB_SHARED_BUF_SIZE 2032 + +struct ghcb { + struct ghcb_save_area save; + u8 reserved_save[2048 - sizeof(struct ghcb_save_area)]; + + u8 shared_buffer[GHCB_SHARED_BUF_SIZE]; + + u8 reserved_0xff0[10]; + u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */ + u32 ghcb_usage; +} __packed; + +struct vmcb { struct vmcb_control_area control; - struct vmcb_save_area save; -}; + union { + struct vmcb_save_area save; + + /* + * For SEV-ES VMs, the save area in the VMCB is used only to + * save/load host state. Guest state resides in a separate + * page, the aptly named VM Save Area (VMSA), that is encrypted + * with the guest's private key. + */ + struct sev_es_save_area host_sev_es_save; + }; +} __packed; + +#define EXPECTED_VMCB_SAVE_AREA_SIZE 744 +#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032 +#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648 +#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 +#define EXPECTED_GHCB_SIZE PAGE_SIZE + +#define BUILD_BUG_RESERVED_OFFSET(x, y) \ + ASSERT_STRUCT_OFFSET(struct x, reserved ## _ ## y, y) + +static inline void __unused_size_checks(void) +{ + BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); + BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); + + /* Check offsets of reserved fields */ + + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xa0); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xd8); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x180); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x248); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x298); + + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xc8); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xd8); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x300); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0); + + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x0); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x148); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x168); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x180); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x1e0); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x200); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x320); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x380); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x3b0); + + BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0); +} #define SVM_CPUID_FUNC 0x8000000a -#define SVM_VM_CR_SVM_DISABLE 4 - #define SVM_SELECTOR_S_SHIFT 4 #define SVM_SELECTOR_DPL_SHIFT 5 #define SVM_SELECTOR_P_SHIFT 7 @@ -232,32 +620,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK #define SVM_SELECTOR_CODE_MASK (1 << 3) -#define INTERCEPT_CR0_READ 0 -#define INTERCEPT_CR3_READ 3 -#define INTERCEPT_CR4_READ 4 -#define INTERCEPT_CR8_READ 8 -#define INTERCEPT_CR0_WRITE (16 + 0) -#define INTERCEPT_CR3_WRITE (16 + 3) -#define INTERCEPT_CR4_WRITE (16 + 4) -#define INTERCEPT_CR8_WRITE (16 + 8) - -#define INTERCEPT_DR0_READ 0 -#define INTERCEPT_DR1_READ 1 -#define INTERCEPT_DR2_READ 2 -#define INTERCEPT_DR3_READ 3 -#define INTERCEPT_DR4_READ 4 -#define INTERCEPT_DR5_READ 5 -#define INTERCEPT_DR6_READ 6 -#define INTERCEPT_DR7_READ 7 -#define INTERCEPT_DR0_WRITE (16 + 0) -#define INTERCEPT_DR1_WRITE (16 + 1) -#define INTERCEPT_DR2_WRITE (16 + 2) -#define INTERCEPT_DR3_WRITE (16 + 3) -#define INTERCEPT_DR4_WRITE (16 + 4) -#define INTERCEPT_DR5_WRITE (16 + 5) -#define INTERCEPT_DR6_WRITE (16 + 6) -#define INTERCEPT_DR7_WRITE (16 + 7) - #define SVM_EVTINJ_VEC_MASK 0xff #define SVM_EVTINJ_TYPE_SHIFT 8 @@ -290,4 +652,58 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) +/* GHCB Accessor functions */ + +#define GHCB_BITMAP_IDX(field) \ + (offsetof(struct ghcb_save_area, field) / sizeof(u64)) + +#define DEFINE_GHCB_ACCESSORS(field) \ + static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \ + { \ + return test_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&ghcb->save.valid_bitmap); \ + } \ + \ + static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb) \ + { \ + return ghcb->save.field; \ + } \ + \ + static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \ + { \ + return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \ + } \ + \ + static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \ + { \ + __set_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&ghcb->save.valid_bitmap); \ + ghcb->save.field = value; \ + } + +DEFINE_GHCB_ACCESSORS(cpl) +DEFINE_GHCB_ACCESSORS(rip) +DEFINE_GHCB_ACCESSORS(rsp) +DEFINE_GHCB_ACCESSORS(rax) +DEFINE_GHCB_ACCESSORS(rcx) +DEFINE_GHCB_ACCESSORS(rdx) +DEFINE_GHCB_ACCESSORS(rbx) +DEFINE_GHCB_ACCESSORS(rbp) +DEFINE_GHCB_ACCESSORS(rsi) +DEFINE_GHCB_ACCESSORS(rdi) +DEFINE_GHCB_ACCESSORS(r8) +DEFINE_GHCB_ACCESSORS(r9) +DEFINE_GHCB_ACCESSORS(r10) +DEFINE_GHCB_ACCESSORS(r11) +DEFINE_GHCB_ACCESSORS(r12) +DEFINE_GHCB_ACCESSORS(r13) +DEFINE_GHCB_ACCESSORS(r14) +DEFINE_GHCB_ACCESSORS(r15) +DEFINE_GHCB_ACCESSORS(sw_exit_code) +DEFINE_GHCB_ACCESSORS(sw_exit_info_1) +DEFINE_GHCB_ACCESSORS(sw_exit_info_2) +DEFINE_GHCB_ACCESSORS(sw_scratch) +DEFINE_GHCB_ACCESSORS(xcr0) +DEFINE_GHCB_ACCESSORS(xss) + #endif diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h deleted file mode 100644 index ff6c92eff035..000000000000 --- a/arch/x86/include/asm/swiotlb.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_SWIOTLB_H -#define _ASM_X86_SWIOTLB_H - -#include <linux/swiotlb.h> - -#ifdef CONFIG_SWIOTLB -extern int swiotlb; -extern int __init pci_swiotlb_detect_override(void); -extern int __init pci_swiotlb_detect_4gb(void); -extern void __init pci_swiotlb_init(void); -extern void __init pci_swiotlb_late_init(void); -#else -#define swiotlb 0 -static inline int pci_swiotlb_detect_override(void) -{ - return 0; -} -static inline int pci_swiotlb_detect_4gb(void) -{ - return 0; -} -static inline void pci_swiotlb_init(void) -{ -} -static inline void pci_swiotlb_late_init(void) -{ -} -#endif -#endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 7cf1a270d891..499b1c15cc8b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -12,28 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -/* This runs runs on the previous thread's stack. */ -static inline void prepare_switch_to(struct task_struct *next) -{ -#ifdef CONFIG_VMAP_STACK - /* - * If we switch to a stack that has a top-level paging entry - * that is not present in the current mm, the resulting #PF will - * will be promoted to a double-fault and we'll panic. Probe - * the new stack now so that vmalloc_fault can fix up the page - * tables if needed. This can only happen if we use a stack - * in vmap space. - * - * We assume that the stack is aligned so that it never spans - * more than one top-level paging entry. - * - * To minimize cache pollution, just follow the stack pointer. - */ - READ_ONCE(*(unsigned char *)next->thread.sp); -#endif -} - -asmlinkage void ret_from_fork(void); +asmlinkage void ret_from_fork_asm(void); +__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, + int (*fn)(void *), void *fn_arg); /* * This is the structure pointed to by thread.sp for an inactive task. The @@ -46,6 +27,7 @@ struct inactive_task_frame { unsigned long r13; unsigned long r12; #else + unsigned long flags; unsigned long si; unsigned long di; #endif @@ -66,12 +48,12 @@ struct fork_frame { #define switch_to(prev, next, last) \ do { \ - prepare_switch_to(next); \ - \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) #ifdef CONFIG_X86_32 +#include <asm/msr.h> + static inline void refresh_sysenter_cs(struct thread_struct *thread) { /* Only happens when SEP is enabled, no need to test "SEP"arately: */ @@ -79,7 +61,7 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread) return; this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + wrmsrq(MSR_IA32_SYSENTER_CS, thread->sysenter_cs); } #endif @@ -88,21 +70,23 @@ static inline void update_task_stack(struct task_struct *task) { /* sp0 always points to the entry trampoline stack, which is constant: */ #ifdef CONFIG_X86_32 - if (static_cpu_has(X86_FEATURE_XENPV)) - load_sp0(task->thread.sp0); - else - this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); + this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); #else - /* - * x86-64 updates x86_tss.sp1 via cpu_current_top_of_stack. That - * doesn't work on x86-32 because sp1 and - * cpu_current_top_of_stack have different values (because of - * the non-zero stack-padding on 32bit). - */ - if (static_cpu_has(X86_FEATURE_XENPV)) + if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV)) + /* Xen PV enters the kernel on the thread stack. */ load_sp0(task_top_of_stack(task)); #endif +} +static inline void kthread_frame_init(struct inactive_task_frame *frame, + int (*fun)(void *), void *arg) +{ + frame->bx = (unsigned long)fun; +#ifdef CONFIG_X86_32 + frame->di = (unsigned long)arg; +#else + frame->r12 = (unsigned long)arg; +#endif } #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index 2fe745356fb1..cd21a0405ac5 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h @@ -14,6 +14,8 @@ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). */ +#include <asm/rmwcc.h> + #define ADDR (*(volatile long *)addr) /** @@ -29,7 +31,7 @@ */ static inline void sync_set_bit(long nr, volatile unsigned long *addr) { - asm volatile("lock; bts %1,%0" + asm volatile("lock " __ASM_SIZE(bts) " %1,%0" : "+m" (ADDR) : "Ir" (nr) : "memory"); @@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr) */ static inline void sync_clear_bit(long nr, volatile unsigned long *addr) { - asm volatile("lock; btr %1,%0" + asm volatile("lock " __ASM_SIZE(btr) " %1,%0" : "+m" (ADDR) : "Ir" (nr) : "memory"); @@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr) */ static inline void sync_change_bit(long nr, volatile unsigned long *addr) { - asm volatile("lock; btc %1,%0" + asm volatile("lock " __ASM_SIZE(btc) " %1,%0" : "+m" (ADDR) : "Ir" (nr) : "memory"); @@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) +static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; - - asm volatile("lock; bts %2,%1\n\tsetc %0" - : "=qm" (oldbit), "+m" (ADDR) - : "Ir" (nr) : "memory"); - return oldbit; + return GEN_BINARY_RMWcc("lock " __ASM_SIZE(bts), *addr, c, "Ir", nr); } /** @@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; - - asm volatile("lock; btr %2,%1\n\tsetc %0" - : "=qm" (oldbit), "+m" (ADDR) - : "Ir" (nr) : "memory"); - return oldbit; + return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btr), *addr, c, "Ir", nr); } /** @@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; - - asm volatile("lock; btc %2,%1\n\tsetc %0" - : "=qm" (oldbit), "+m" (ADDR) - : "Ir" (nr) : "memory"); - return oldbit; + return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btc), *addr, c, "Ir", nr); } #define sync_test_bit(nr, addr) test_bit(nr, addr) diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h index c67caafd3381..96bda43538ee 100644 --- a/arch/x86/include/asm/sync_core.h +++ b/arch/x86/include/asm/sync_core.h @@ -5,6 +5,88 @@ #include <linux/preempt.h> #include <asm/processor.h> #include <asm/cpufeature.h> +#include <asm/special_insns.h> + +#ifdef CONFIG_X86_32 +static __always_inline void iret_to_self(void) +{ + asm volatile ( + "pushfl\n\t" + "pushl %%cs\n\t" + "pushl $1f\n\t" + "iret\n\t" + "1:" + : ASM_CALL_CONSTRAINT : : "memory"); +} +#else +static __always_inline void iret_to_self(void) +{ + unsigned int tmp; + + asm volatile ( + "mov %%ss, %0\n\t" + "pushq %q0\n\t" + "pushq %%rsp\n\t" + "addq $8, (%%rsp)\n\t" + "pushfq\n\t" + "mov %%cs, %0\n\t" + "pushq %q0\n\t" + "pushq $1f\n\t" + "iretq\n\t" + "1:" + : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); +} +#endif /* CONFIG_X86_32 */ + +/* + * This function forces the icache and prefetched instruction stream to + * catch up with reality in two very specific cases: + * + * a) Text was modified using one virtual address and is about to be executed + * from the same physical page at a different virtual address. + * + * b) Text was modified on a different CPU, may subsequently be + * executed on this CPU, and you want to make sure the new version + * gets executed. This generally means you're calling this in an IPI. + * + * If you're calling this for a different reason, you're probably doing + * it wrong. + * + * Like all of Linux's memory ordering operations, this is a + * compiler barrier as well. + */ +static __always_inline void sync_core(void) +{ + /* + * The SERIALIZE instruction is the most straightforward way to + * do this, but it is not universally available. + */ + if (static_cpu_has(X86_FEATURE_SERIALIZE)) { + serialize(); + return; + } + + /* + * For all other processors, there are quite a few ways to do this. + * IRET-to-self is nice because it works on every CPU, at any CPL + * (so it's compatible with paravirtualization), and it never exits + * to a hypervisor. The only downsides are that it's a bit slow + * (it seems to be a bit more than 2x slower than the fastest + * options) and that it unmasks NMIs. The "push %cs" is needed, + * because in paravirtual environments __KERNEL_CS may not be a + * valid CS value when we do IRET directly. + * + * In case NMI unmasking or performance ever becomes a problem, + * the next best option appears to be MOV-to-CR2 and an + * unconditional jump. That sequence also works on all CPUs, + * but it will fault at CPL3 (i.e. Xen PV). + * + * CPUID is the conventional way, but it's nasty: it doesn't + * exist on some 486-like CPUs, and it usually exits to a + * hypervisor. + */ + iret_to_self(); +} /* * Ensure that a core serializing instruction is issued before returning @@ -16,12 +98,13 @@ static inline void sync_core_before_usermode(void) /* With PTI, we unconditionally serialize before running user code. */ if (static_cpu_has(X86_FEATURE_PTI)) return; + /* - * Return from interrupt and NMI is done through iret, which is core - * serializing. + * Even if we're in an interrupt, we might reschedule before returning, + * in which case we could switch to a different thread in the same mm + * and return using SYSRET or SYSEXIT. Instead of trying to keep + * track of our need to sync the core, just sync right away. */ - if (in_irq() || in_nmi()) - return; sync_core(); } diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d653139857af..c10dbb74cd00 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Access to user system call parameters and results * * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * * See asm-generic/syscall.h for descriptions of what we must do here. */ @@ -16,28 +13,20 @@ #include <uapi/linux/audit.h> #include <linux/sched.h> #include <linux/err.h> -#include <asm/asm-offsets.h> /* For NR_syscalls */ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> -#ifdef CONFIG_X86_64 -typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *); -#else -typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); -#endif /* CONFIG_X86_64 */ +/* This is used purely for kernel/trace/trace_syscalls.c */ +typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#define __NR_syscall_compat_max __NR_syscall_max -#define IA32_NR_syscalls NR_syscalls -#endif - -#if defined(CONFIG_IA32_EMULATION) -extern const sys_call_ptr_t ia32_sys_call_table[]; -#endif +/* + * These may not exist, but still put the prototypes in so we + * can use IS_ENABLED(). + */ +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. @@ -49,6 +38,13 @@ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) return regs->orig_ax; } +static inline void syscall_set_nr(struct task_struct *task, + struct pt_regs *regs, + int nr) +{ + regs->orig_ax = nr; +} + static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { @@ -91,23 +87,29 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(args, ®s->bx + i, n * sizeof(args[0])); + args[0] = regs->bx; + args[1] = regs->cx; + args[2] = regs->dx; + args[3] = regs->si; + args[4] = regs->di; + args[5] = regs->bp; } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(®s->bx + i, args, n * sizeof(args[0])); + regs->bx = args[0]; + regs->cx = args[1]; + regs->dx = args[2]; + regs->si = args[3]; + regs->di = args[4]; + regs->bp = args[5]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } @@ -116,131 +118,67 @@ static inline int syscall_get_arch(void) static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; - *args++ = regs->bx; - case 1: - if (!n--) break; - *args++ = regs->cx; - case 2: - if (!n--) break; - *args++ = regs->dx; - case 3: - if (!n--) break; - *args++ = regs->si; - case 4: - if (!n--) break; - *args++ = regs->di; - case 5: - if (!n--) break; - *args++ = regs->bp; - case 6: - if (!n--) break; - default: - BUG(); - break; - } - else + if (task->thread_info.status & TS_COMPAT) { + *args++ = regs->bx; + *args++ = regs->cx; + *args++ = regs->dx; + *args++ = regs->si; + *args++ = regs->di; + *args = regs->bp; + } else # endif - switch (i) { - case 0: - if (!n--) break; - *args++ = regs->di; - case 1: - if (!n--) break; - *args++ = regs->si; - case 2: - if (!n--) break; - *args++ = regs->dx; - case 3: - if (!n--) break; - *args++ = regs->r10; - case 4: - if (!n--) break; - *args++ = regs->r8; - case 5: - if (!n--) break; - *args++ = regs->r9; - case 6: - if (!n--) break; - default: - BUG(); - break; - } + { + *args++ = regs->di; + *args++ = regs->si; + *args++ = regs->dx; + *args++ = regs->r10; + *args++ = regs->r8; + *args = regs->r9; + } } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; - regs->bx = *args++; - case 1: - if (!n--) break; - regs->cx = *args++; - case 2: - if (!n--) break; - regs->dx = *args++; - case 3: - if (!n--) break; - regs->si = *args++; - case 4: - if (!n--) break; - regs->di = *args++; - case 5: - if (!n--) break; - regs->bp = *args++; - case 6: - if (!n--) break; - default: - BUG(); - break; - } - else + if (task->thread_info.status & TS_COMPAT) { + regs->bx = *args++; + regs->cx = *args++; + regs->dx = *args++; + regs->si = *args++; + regs->di = *args++; + regs->bp = *args; + } else # endif - switch (i) { - case 0: - if (!n--) break; - regs->di = *args++; - case 1: - if (!n--) break; - regs->si = *args++; - case 2: - if (!n--) break; - regs->dx = *args++; - case 3: - if (!n--) break; - regs->r10 = *args++; - case 4: - if (!n--) break; - regs->r8 = *args++; - case 5: - if (!n--) break; - regs->r9 = *args++; - case 6: - if (!n--) break; - default: - BUG(); - break; - } + { + regs->di = *args++; + regs->si = *args++; + regs->dx = *args++; + regs->r10 = *args++; + regs->r8 = *args++; + regs->r9 = *args; + } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ - return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + return (IS_ENABLED(CONFIG_IA32_EMULATION) && + task->thread_info.status & TS_COMPAT) + ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } + +bool do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_emulation(struct pt_regs *regs); + #endif /* CONFIG_X86_32 */ +void do_int80_syscall_32(struct pt_regs *regs); +bool do_fast_syscall_32(struct pt_regs *regs); +bool do_SYSENTER_32(struct pt_regs *regs); + #endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e046a405743d..7e88705e907f 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,86 +6,183 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H +#include <asm/ptrace.h> + +extern long __x64_sys_ni_syscall(const struct pt_regs *regs); +extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); + +/* + * Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes + * struct pt_regs *regs as the only argument of the syscall stub(s) named as: + * __x64_sys_*() - 64-bit native syscall + * __ia32_sys_*() - 32-bit native syscall or common compat syscall + * __ia32_compat_sys_*() - 32-bit compat syscall + * __x64_compat_sys_*() - 64-bit X32 compat syscall + * + * The registers are decoded according to the ABI: + * 64-bit: RDI, RSI, RDX, R10, R8, R9 + * 32-bit: EBX, ECX, EDX, ESI, EDI, EBP + * + * The stub then passes the decoded arguments to the __se_sys_*() wrapper to + * perform sign-extension (omitted for zero-argument syscalls). Finally the + * arguments are passed to the __do_sys_*() function which is the actual + * syscall. These wrappers are marked as inline so the compiler can optimize + * the functions where appropriate. + * + * Example assembly (slightly re-ordered for better readability): + * + * <__x64_sys_recv>: <-- syscall with 4 parameters + * callq <__fentry__> + * + * mov 0x70(%rdi),%rdi <-- decode regs->di + * mov 0x68(%rdi),%rsi <-- decode regs->si + * mov 0x60(%rdi),%rdx <-- decode regs->dx + * mov 0x38(%rdi),%rcx <-- decode regs->r10 + * + * xor %r9d,%r9d <-- clear %r9 + * xor %r8d,%r8d <-- clear %r8 + * + * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() + * which takes 6 arguments + * + * cltq <-- extend return value to 64-bit + * retq <-- return + * + * This approach avoids leaking random user-provided register content down + * the call chain. + */ + /* Mapping of registers to parameters for syscalls on x86-64 and x32 */ #define SC_X86_64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ ,,regs->di,,regs->si,,regs->dx \ ,,regs->r10,,regs->r8,,regs->r9) \ + +/* SYSCALL_PT_ARGS is Adapted from s390x */ +#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \ + SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp)) +#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di)) +#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si)) +#define SYSCALL_PT_ARG3(m, t1, t2, t3) \ + SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx)) +#define SYSCALL_PT_ARG2(m, t1, t2) \ + SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx)) +#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx)) +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + +#define __SC_COMPAT_CAST(t, a) \ + (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \ + (unsigned int)a + /* Mapping of registers to parameters for syscalls on i386 */ #define SC_IA32_REGS_TO_ARGS(x, ...) \ - __MAP(x,__SC_ARGS \ - ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ - ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ - ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \ + __MAP(x, __SC_TYPE, __VA_ARGS__)) \ -#ifdef CONFIG_IA32_EMULATION -/* - * For IA32 emulation, we need to handle "compat" syscalls *and* create - * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the - * ia32 regs in the proper order for shared or "common" syscalls. As some - * syscalls may not be implemented, we need to expand COND_SYSCALL in - * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this - * case as well. - */ -#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ - asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ - asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ +#define __SYS_STUB0(abi, name) \ + long __##abi##_##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \ + long __##abi##_##name(const struct pt_regs *regs) \ + __alias(__do_##name); + +#define __SYS_STUBx(abi, name, ...) \ + long __##abi##_##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \ + long __##abi##_##name(const struct pt_regs *regs) \ { \ - return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + return __se_##name(__VA_ARGS__); \ + } -#define __IA32_SYS_STUBx(x, name, ...) \ - asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \ - asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \ +#define __COND_SYSCALL(abi, name) \ + __weak long __##abi##_##name(const struct pt_regs *__unused); \ + __weak long __##abi##_##name(const struct pt_regs *__unused) \ { \ - return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ + return sys_ni_syscall(); \ } +#ifdef CONFIG_X86_64 +#define __X64_SYS_STUB0(name) \ + __SYS_STUB0(x64, sys_##name) + +#define __X64_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(x64, sys##name, \ + SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __X64_COND_SYSCALL(name) \ + __COND_SYSCALL(x64, sys_##name) + +#else /* CONFIG_X86_64 */ +#define __X64_SYS_STUB0(name) +#define __X64_SYS_STUBx(x, name, ...) +#define __X64_COND_SYSCALL(name) +#endif /* CONFIG_X86_64 */ + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#define __IA32_SYS_STUB0(name) \ + __SYS_STUB0(ia32, sys_##name) + +#define __IA32_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(ia32, sys##name, \ + SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __IA32_COND_SYSCALL(name) \ + __COND_SYSCALL(ia32, sys_##name) + +#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ +#define __IA32_SYS_STUB0(name) +#define __IA32_SYS_STUBx(x, name, ...) +#define __IA32_COND_SYSCALL(name) +#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ + +#ifdef CONFIG_IA32_EMULATION /* - * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias - * named __ia32_sys_*() + * For IA32 emulation, we need to handle "compat" syscalls *and* create + * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the + * ia32 regs in the proper order for shared or "common" syscalls. As some + * syscalls may not be implemented, we need to expand COND_SYSCALL in + * kernel/sys_ni.c to cover this case as well. */ -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ - asmlinkage long __x64_sys_##sname(void) +#define __IA32_COMPAT_SYS_STUB0(name) \ + __SYS_STUB0(ia32, compat_sys_##name) -#define COND_SYSCALL(name) \ - cond_syscall(__x64_sys_##name); \ - cond_syscall(__ia32_sys_##name) +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(ia32, compat_sys##name, \ + SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__)) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers) +#define __IA32_COMPAT_COND_SYSCALL(name) \ + __COND_SYSCALL(ia32, compat_sys_##name) #else /* CONFIG_IA32_EMULATION */ +#define __IA32_COMPAT_SYS_STUB0(name) #define __IA32_COMPAT_SYS_STUBx(x, name, ...) -#define __IA32_SYS_STUBx(x, fullname, name, ...) +#define __IA32_COMPAT_COND_SYSCALL(name) #endif /* CONFIG_IA32_EMULATION */ -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common * with x86_64 obviously do not need such care. */ +#define __X32_COMPAT_SYS_STUB0(name) \ + __SYS_STUB0(x64, compat_sys_##name) + #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ - asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ - asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ - { \ - return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + __SYS_STUBx(x64, compat_sys##name, \ + SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __X32_COMPAT_COND_SYSCALL(name) \ + __COND_SYSCALL(x64, compat_sys_##name) -#else /* CONFIG_X86_X32 */ +#else /* CONFIG_X86_X32_ABI */ +#define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) -#endif /* CONFIG_X86_X32 */ +#define __X32_COMPAT_COND_SYSCALL(name) +#endif /* CONFIG_X86_X32_ABI */ #ifdef CONFIG_COMPAT @@ -94,6 +191,14 @@ * mapping of registers to parameters, we need to generate stubs for each * of them. */ +#define COMPAT_SYSCALL_DEFINE0(name) \ + static long \ + __do_compat_sys_##name(const struct pt_regs *__unused); \ + __IA32_COMPAT_SYS_STUB0(name) \ + __X32_COMPAT_SYS_STUB0(name) \ + static long \ + __do_compat_sys_##name(const struct pt_regs *__unused) + #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -107,62 +212,18 @@ /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ - cond_syscall(__ia32_compat_sys_##name); \ - cond_syscall(__x32_compat_sys_##name) - -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers) + __IA32_COMPAT_COND_SYSCALL(name) \ + __X32_COMPAT_COND_SYSCALL(name) #endif /* CONFIG_COMPAT */ - -/* - * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes - * struct pt_regs *regs as the only argument of the syscall stub named - * __x64_sys_*(). It decodes just the registers it needs and passes them on to - * the __se_sys_*() wrapper performing sign extension and then to the - * __do_sys_*() function doing the actual job. These wrappers and functions - * are inlined (at least in very most cases), meaning that the assembly looks - * as follows (slightly re-ordered for better readability): - * - * <__x64_sys_recv>: <-- syscall with 4 parameters - * callq <__fentry__> - * - * mov 0x70(%rdi),%rdi <-- decode regs->di - * mov 0x68(%rdi),%rsi <-- decode regs->si - * mov 0x60(%rdi),%rdx <-- decode regs->dx - * mov 0x38(%rdi),%rcx <-- decode regs->r10 - * - * xor %r9d,%r9d <-- clear %r9 - * xor %r8d,%r8d <-- clear %r8 - * - * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() - * which takes 6 arguments - * - * cltq <-- extend return value to 64-bit - * retq <-- return - * - * This approach avoids leaking random user-provided register content down - * the call chain. - * - * If IA32_EMULATION is enabled, this macro generates an additional wrapper - * named __ia32_sys_*() which decodes the struct pt_regs *regs according - * to the i386 calling convention (bx, cx, dx, si, di, bp). - */ #define __SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long __x64_sys##name(const struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - asmlinkage long __x64_sys##name(const struct pt_regs *regs) \ - { \ - return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + __X64_SYS_STUBx(x, name, __VA_ARGS__) \ __IA32_SYS_STUBx(x, name, __VA_ARGS__) \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ @@ -177,33 +238,27 @@ * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not * hurt, we only need to re-define it here to keep the naming congruent to - * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() - * macros to work correctly. + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro + * to work correctly. */ -#ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - asmlinkage long __x64_sys_##sname(void) -#endif +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + static long __do_sys_##sname(const struct pt_regs *__unused); \ + __X64_SYS_STUB0(sname) \ + __IA32_SYS_STUB0(sname) \ + static long __do_sys_##sname(const struct pt_regs *__unused) -#ifndef COND_SYSCALL -#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) -#endif - -#ifndef SYS_NI -#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); -#endif +#define COND_SYSCALL(name) \ + __X64_COND_SYSCALL(name) \ + __IA32_COND_SYSCALL(name) /* * For VSYSCALLS, we need to declare these three syscalls with the new * pt_regs-based calling convention for in-kernel use. */ -struct pt_regs; -asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); -asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); -asmlinkage long __x64_sys_time(const struct pt_regs *regs); +long __x64_sys_getcpu(const struct pt_regs *regs); +long __x64_sys_gettimeofday(const struct pt_regs *regs); +long __x64_sys_time(const struct pt_regs *regs); #endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 9fa979dd0d9d..6714a358235d 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -1,51 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * syscalls.h - Linux syscall interfaces (arch-specific) * * Copyright (c) 2008 Jaswinder Singh Rajput - * - * This file is released under the GPLv2. - * See the file COPYING for more details. */ #ifndef _ASM_X86_SYSCALLS_H #define _ASM_X86_SYSCALLS_H -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <linux/signal.h> -#include <linux/types.h> - /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); -#ifdef CONFIG_X86_32 -/* - * These definitions are only valid on pure 32-bit systems; x86-64 uses a - * different syscall calling convention - */ -asmlinkage long sys_ioperm(unsigned long, unsigned long, int); -asmlinkage long sys_iopl(unsigned int); - -/* kernel/ldt.c */ -asmlinkage long sys_modify_ldt(int, void __user *, unsigned long); - -/* kernel/signal.c */ -asmlinkage long sys_rt_sigreturn(void); - -/* kernel/tls.c */ -asmlinkage long sys_set_thread_area(struct user_desc __user *); -asmlinkage long sys_get_thread_area(struct user_desc __user *); - -/* X86_32 only */ - -/* kernel/signal.c */ -asmlinkage long sys_sigreturn(void); - -/* kernel/vm86_32.c */ -struct vm86_struct; -asmlinkage long sys_vm86old(struct vm86_struct __user *); -asmlinkage long sys_vm86(unsigned long, unsigned long); - -#endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_SYSCALLS_H */ diff --git a/arch/x86/include/asm/sysfb.h b/arch/x86/include/asm/sysfb.h deleted file mode 100644 index 2aeb3e25579c..000000000000 --- a/arch/x86/include/asm/sysfb.h +++ /dev/null @@ -1,98 +0,0 @@ -#ifndef _ARCH_X86_KERNEL_SYSFB_H -#define _ARCH_X86_KERNEL_SYSFB_H - -/* - * Generic System Framebuffers on x86 - * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include <linux/kernel.h> -#include <linux/platform_data/simplefb.h> -#include <linux/screen_info.h> - -enum { - M_I17, /* 17-Inch iMac */ - M_I20, /* 20-Inch iMac */ - M_I20_SR, /* 20-Inch iMac (Santa Rosa) */ - M_I24, /* 24-Inch iMac */ - M_I24_8_1, /* 24-Inch iMac, 8,1th gen */ - M_I24_10_1, /* 24-Inch iMac, 10,1th gen */ - M_I27_11_1, /* 27-Inch iMac, 11,1th gen */ - M_MINI, /* Mac Mini */ - M_MINI_3_1, /* Mac Mini, 3,1th gen */ - M_MINI_4_1, /* Mac Mini, 4,1th gen */ - M_MB, /* MacBook */ - M_MB_2, /* MacBook, 2nd rev. */ - M_MB_3, /* MacBook, 3rd rev. */ - M_MB_5_1, /* MacBook, 5th rev. */ - M_MB_6_1, /* MacBook, 6th rev. */ - M_MB_7_1, /* MacBook, 7th rev. */ - M_MB_SR, /* MacBook, 2nd gen, (Santa Rosa) */ - M_MBA, /* MacBook Air */ - M_MBA_3, /* Macbook Air, 3rd rev */ - M_MBP, /* MacBook Pro */ - M_MBP_2, /* MacBook Pro 2nd gen */ - M_MBP_2_2, /* MacBook Pro 2,2nd gen */ - M_MBP_SR, /* MacBook Pro (Santa Rosa) */ - M_MBP_4, /* MacBook Pro, 4th gen */ - M_MBP_5_1, /* MacBook Pro, 5,1th gen */ - M_MBP_5_2, /* MacBook Pro, 5,2th gen */ - M_MBP_5_3, /* MacBook Pro, 5,3rd gen */ - M_MBP_6_1, /* MacBook Pro, 6,1th gen */ - M_MBP_6_2, /* MacBook Pro, 6,2th gen */ - M_MBP_7_1, /* MacBook Pro, 7,1th gen */ - M_MBP_8_2, /* MacBook Pro, 8,2nd gen */ - M_UNKNOWN /* placeholder */ -}; - -struct efifb_dmi_info { - char *optname; - unsigned long base; - int stride; - int width; - int height; - int flags; -}; - -#ifdef CONFIG_EFI - -extern struct efifb_dmi_info efifb_dmi_list[]; -void sysfb_apply_efi_quirks(void); - -#else /* CONFIG_EFI */ - -static inline void sysfb_apply_efi_quirks(void) -{ -} - -#endif /* CONFIG_EFI */ - -#ifdef CONFIG_X86_SYSFB - -bool parse_mode(const struct screen_info *si, - struct simplefb_platform_data *mode); -int create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); - -#else /* CONFIG_X86_SYSFB */ - -static inline bool parse_mode(const struct screen_info *si, - struct simplefb_platform_data *mode) -{ - return false; -} - -static inline int create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) -{ - return -EINVAL; -} - -#endif /* CONFIG_X86_SYSFB */ - -#endif /* _ARCH_X86_KERNEL_SYSFB_H */ diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h deleted file mode 100644 index 7a6677c1a715..000000000000 --- a/arch/x86/include/asm/tce.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * This file is derived from asm-powerpc/tce.h. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - * Author: Jon Mason <jdmason@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _ASM_X86_TCE_H -#define _ASM_X86_TCE_H - -extern unsigned int specified_table_size; -struct iommu_table; - -#define TCE_ENTRY_SIZE 8 /* in bytes */ - -#define TCE_READ_SHIFT 0 -#define TCE_WRITE_SHIFT 1 -#define TCE_HUBID_SHIFT 2 /* unused */ -#define TCE_RSVD_SHIFT 8 /* unused */ -#define TCE_RPN_SHIFT 12 -#define TCE_UNUSED_SHIFT 48 /* unused */ - -#define TCE_RPN_MASK 0x0000fffffffff000ULL - -extern void tce_build(struct iommu_table *tbl, unsigned long index, - unsigned int npages, unsigned long uaddr, int direction); -extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages); -extern void * __init alloc_tce_table(void); -extern void __init free_tce_table(void *tbl); -extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar); - -#endif /* _ASM_X86_TCE_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h new file mode 100644 index 000000000000..6b338d7f01b7 --- /dev/null +++ b/arch/x86/include/asm/tdx.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021-2022 Intel Corporation */ +#ifndef _ASM_X86_TDX_H +#define _ASM_X86_TDX_H + +#include <linux/init.h> +#include <linux/bits.h> +#include <linux/mmzone.h> + +#include <asm/errno.h> +#include <asm/ptrace.h> +#include <asm/trapnr.h> +#include <asm/shared/tdx.h> + +/* + * SW-defined error codes. + * + * Bits 47:40 == 0xFF indicate Reserved status code class that never used by + * TDX module. + */ +#define TDX_ERROR _BITUL(63) +#define TDX_NON_RECOVERABLE _BITUL(62) +#define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40)) +#define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000)) + +#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP) +#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD) + +/* + * TDX module SEAMCALL leaf function error codes + */ +#define TDX_SUCCESS 0ULL +#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL + +#ifndef __ASSEMBLER__ + +#include <uapi/asm/mce.h> +#include <asm/tdx_global_metadata.h> +#include <linux/pgtable.h> + +/* + * Used by the #VE exception handler to gather the #VE exception + * info from the TDX module. This is a software only structure + * and not part of the TDX module/VMM ABI. + */ +struct ve_info { + u64 exit_reason; + u64 exit_qual; + /* Guest Linear (virtual) Address */ + u64 gla; + /* Guest Physical Address */ + u64 gpa; + u32 instr_len; + u32 instr_info; +}; + +#ifdef CONFIG_INTEL_TDX_GUEST + +void __init tdx_early_init(void); + +void tdx_get_ve_info(struct ve_info *ve); + +bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); + +void tdx_halt(void); + +bool tdx_early_handle_ve(struct pt_regs *regs); + +int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport); + +int tdx_mcall_extend_rtmr(u8 index, u8 *data); + +u64 tdx_hcall_get_quote(u8 *buf, size_t size); + +void __init tdx_dump_attributes(u64 td_attr); +void __init tdx_dump_td_ctls(u64 td_ctls); + +#else + +static inline void tdx_early_init(void) { }; +static inline void tdx_halt(void) { }; + +static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } + +#endif /* CONFIG_INTEL_TDX_GUEST */ + +#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST) +long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, + unsigned long p3, unsigned long p4); +#else +static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + return -ENODEV; +} +#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */ + +#ifdef CONFIG_INTEL_TDX_HOST +u64 __seamcall(u64 fn, struct tdx_module_args *args); +u64 __seamcall_ret(u64 fn, struct tdx_module_args *args); +u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args); +void tdx_init(void); + +#include <linux/preempt.h> +#include <asm/archrandom.h> +#include <asm/processor.h> + +typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); + +static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn, + struct tdx_module_args *args) +{ + lockdep_assert_preemption_disabled(); + + /* + * SEAMCALLs are made to the TDX module and can generate dirty + * cachelines of TDX private memory. Mark cache state incoherent + * so that the cache can be flushed during kexec. + * + * This needs to be done before actually making the SEAMCALL, + * because kexec-ing CPU could send NMI to stop remote CPUs, + * in which case even disabling IRQ won't help here. + */ + this_cpu_write(cache_state_incoherent, true); + + return func(fn, args); +} + +static __always_inline u64 sc_retry(sc_func_t func, u64 fn, + struct tdx_module_args *args) +{ + int retry = RDRAND_RETRY_LOOPS; + u64 ret; + + do { + preempt_disable(); + ret = __seamcall_dirty_cache(func, fn, args); + preempt_enable(); + } while (ret == TDX_RND_NO_ENTROPY && --retry); + + return ret; +} + +#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args)) +#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args)) +#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args)) +int tdx_cpu_enable(void); +int tdx_enable(void); +const char *tdx_dump_mce_info(struct mce *m); +const struct tdx_sys_info *tdx_get_sysinfo(void); + +int tdx_guest_keyid_alloc(void); +u32 tdx_get_nr_guest_keyids(void); +void tdx_guest_keyid_free(unsigned int keyid); + +void tdx_quirk_reset_page(struct page *page); + +struct tdx_td { + /* TD root structure: */ + struct page *tdr_page; + + int tdcs_nr_pages; + /* TD control structure: */ + struct page **tdcs_pages; + + /* Size of `tdcx_pages` in struct tdx_vp */ + int tdcx_nr_pages; +}; + +struct tdx_vp { + /* TDVP root page */ + struct page *tdvpr_page; + /* precalculated page_to_phys(tdvpr_page) for use in noinstr code */ + phys_addr_t tdvpr_pa; + + /* TD vCPU control structure: */ + struct page **tdcx_pages; +}; + +static inline u64 mk_keyed_paddr(u16 hkid, struct page *page) +{ + u64 ret; + + ret = page_to_phys(page); + /* KeyID bits are just above the physical address bits: */ + ret |= (u64)hkid << boot_cpu_data.x86_phys_bits; + + return ret; +} + +static inline int pg_level_to_tdx_sept_level(enum pg_level level) +{ + WARN_ON_ONCE(level == PG_LEVEL_NONE); + return level - 1; +} + +u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args); +u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); +u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2); +u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2); +u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page); +u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2); +u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2); +u64 tdh_mng_key_config(struct tdx_td *td); +u64 tdh_mng_create(struct tdx_td *td, u16 hkid); +u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp); +u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data); +u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2); +u64 tdh_mr_finalize(struct tdx_td *td); +u64 tdh_vp_flush(struct tdx_vp *vp); +u64 tdh_mng_vpflushdone(struct tdx_td *td); +u64 tdh_mng_key_freeid(struct tdx_td *td); +u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err); +u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); +u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data); +u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); +u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); +u64 tdh_mem_track(struct tdx_td *tdr); +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2); +u64 tdh_phymem_cache_wb(bool resume); +u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); +u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page); +#else +static inline void tdx_init(void) { } +static inline int tdx_cpu_enable(void) { return -ENODEV; } +static inline int tdx_enable(void) { return -ENODEV; } +static inline u32 tdx_get_nr_guest_keyids(void) { return 0; } +static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; } +static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; } +#endif /* CONFIG_INTEL_TDX_HOST */ + +#ifdef CONFIG_KEXEC_CORE +void tdx_cpu_flush_cache_for_kexec(void); +#else +static inline void tdx_cpu_flush_cache_for_kexec(void) { } +#endif + +#endif /* !__ASSEMBLER__ */ +#endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/include/asm/tdx_global_metadata.h b/arch/x86/include/asm/tdx_global_metadata.h new file mode 100644 index 000000000000..060a2ad744bf --- /dev/null +++ b/arch/x86/include/asm/tdx_global_metadata.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Automatically generated TDX global metadata structures. */ +#ifndef _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H +#define _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H + +#include <linux/types.h> + +struct tdx_sys_info_features { + u64 tdx_features0; +}; + +struct tdx_sys_info_tdmr { + u16 max_tdmrs; + u16 max_reserved_per_tdmr; + u16 pamt_4k_entry_size; + u16 pamt_2m_entry_size; + u16 pamt_1g_entry_size; +}; + +struct tdx_sys_info_td_ctrl { + u16 tdr_base_size; + u16 tdcs_base_size; + u16 tdvps_base_size; +}; + +struct tdx_sys_info_td_conf { + u64 attributes_fixed0; + u64 attributes_fixed1; + u64 xfam_fixed0; + u64 xfam_fixed1; + u16 num_cpuid_config; + u16 max_vcpus_per_td; + u64 cpuid_config_leaves[128]; + u64 cpuid_config_values[128][2]; +}; + +struct tdx_sys_info { + struct tdx_sys_info_features features; + struct tdx_sys_info_tdmr tdmr; + struct tdx_sys_info_td_ctrl td_ctrl; + struct tdx_sys_info_td_conf td_conf; +}; + +#endif diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index e85ff65c43c3..f2d142a0a862 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -6,19 +6,16 @@ #include <linux/stddef.h> #include <asm/ptrace.h> -struct paravirt_patch_site; -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end); -#else -static inline void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{} -#define __parainstructions NULL -#define __parainstructions_end NULL -#endif - -extern void *text_poke_early(void *addr, const void *opcode, size_t len); +/* + * Currently, the max observed size in the kernel code is + * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5. + * Raise it if needed. + */ +#define TEXT_POKE_MAX_OPCODE_SIZE 5 + +extern void text_poke_early(void *addr, const void *opcode, size_t len); + +extern void text_poke_apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len); /* * Clear and restore the kernel write-protection flag on the local CPU. @@ -31,12 +28,191 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); * no thread can be preempted in the instructions being modified (no iret to an * invalid instruction possible) or if the instructions are changed from a * consistent state to another consistent state atomically. - * On the local CPU you need to be protected again NMI or MCE handlers seeing an - * inconsistent instruction while you patch. + * On the local CPU you need to be protected against NMI or MCE handlers seeing + * an inconsistent instruction while you patch. */ extern void *text_poke(void *addr, const void *opcode, size_t len); -extern int poke_int3_handler(struct pt_regs *regs); -extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern void smp_text_poke_sync_each_cpu(void); +extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); +extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +#define text_poke_copy text_poke_copy +extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok); +extern void *text_poke_set(void *addr, int c, size_t len); +extern int smp_text_poke_int3_handler(struct pt_regs *regs); +extern void smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate); + +extern void smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate); +extern void smp_text_poke_batch_finish(void); + +#define INT3_INSN_SIZE 1 +#define INT3_INSN_OPCODE 0xCC + +#define RET_INSN_SIZE 1 +#define RET_INSN_OPCODE 0xC3 + +#define CALL_INSN_SIZE 5 +#define CALL_INSN_OPCODE 0xE8 + +#define JMP32_INSN_SIZE 5 +#define JMP32_INSN_OPCODE 0xE9 + +#define JMP8_INSN_SIZE 2 +#define JMP8_INSN_OPCODE 0xEB + +#define DISP32_SIZE 4 + +static __always_inline int text_opcode_size(u8 opcode) +{ + int size = 0; + +#define __CASE(insn) \ + case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break + + switch(opcode) { + __CASE(INT3); + __CASE(RET); + __CASE(CALL); + __CASE(JMP32); + __CASE(JMP8); + } + +#undef __CASE + + return size; +} + +union text_poke_insn { + u8 text[TEXT_POKE_MAX_OPCODE_SIZE]; + struct { + u8 opcode; + s32 disp; + } __attribute__((packed)); +}; + +static __always_inline +void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size) +{ + union text_poke_insn *insn = buf; + + BUG_ON(size < text_opcode_size(opcode)); + + /* + * Hide the addresses to avoid the compiler folding in constants when + * referencing code, these can mess up annotations like + * ANNOTATE_NOENDBR. + */ + OPTIMIZER_HIDE_VAR(insn); + OPTIMIZER_HIDE_VAR(addr); + OPTIMIZER_HIDE_VAR(dest); + + insn->opcode = opcode; + + if (size > 1) { + insn->disp = (long)dest - (long)(addr + size); + if (size == 2) { + /* + * Ensure that for JMP8 the displacement + * actually fits the signed byte. + */ + BUG_ON((insn->disp >> 31) != (insn->disp >> 7)); + } + } +} + +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode)); + return &insn.text; +} + extern int after_bootmem; +extern __ro_after_init struct mm_struct *text_poke_mm; +extern __ro_after_init unsigned long text_poke_mm_addr; + +#ifndef CONFIG_UML_X86 +static __always_inline +void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + +static __always_inline +void int3_emulate_push(struct pt_regs *regs, unsigned long val) +{ + /* + * The INT3 handler in entry_64.S adds a gap between the + * stack where the break point happened, and the saving of + * pt_regs. We can extend the original stack because of + * this gap. See the idtentry macro's X86_TRAP_BP logic. + * + * Similarly, entry_32.S will have a gap on the stack for + * (any) hardware exception and pt_regs; see the + * FIXUP_FRAME macro. + */ + regs->sp -= sizeof(unsigned long); + *(unsigned long *)regs->sp = val; +} + +static __always_inline +unsigned long int3_emulate_pop(struct pt_regs *regs) +{ + unsigned long val = *(unsigned long *)regs->sp; + regs->sp += sizeof(unsigned long); + return val; +} + +static __always_inline +void int3_emulate_call(struct pt_regs *regs, unsigned long func) +{ + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); + int3_emulate_jmp(regs, func); +} + +static __always_inline +void int3_emulate_ret(struct pt_regs *regs) +{ + unsigned long ip = int3_emulate_pop(regs); + int3_emulate_jmp(regs, ip); +} + +static __always_inline +bool __emulate_cc(unsigned long flags, u8 cc) +{ + static const unsigned long cc_mask[6] = { + [0] = X86_EFLAGS_OF, + [1] = X86_EFLAGS_CF, + [2] = X86_EFLAGS_ZF, + [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, + [4] = X86_EFLAGS_SF, + [5] = X86_EFLAGS_PF, + }; + + bool invert = cc & 1; + bool match; + + if (cc < 0xc) { + match = flags & cc_mask[cc >> 1]; + } else { + match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + if (cc >= 0xe) + match = match || (flags & X86_EFLAGS_ZF); + } + + return (match && !invert) || (!match && invert); +} + +static __always_inline +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +{ + if (__emulate_cc(regs->flags, cc)) + ip += disp; + + int3_emulate_jmp(regs, ip); +} + +#endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h new file mode 100644 index 000000000000..91a7b6687c3b --- /dev/null +++ b/arch/x86/include/asm/thermal.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_THERMAL_H +#define _ASM_X86_THERMAL_H + +#ifdef CONFIG_X86_THERMAL_VECTOR +void therm_lvt_init(void); +void intel_init_thermal(struct cpuinfo_x86 *c); +bool x86_thermal_enabled(void); +void intel_thermal_interrupt(void); +#else +static inline void therm_lvt_init(void) { } +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +#endif + +#endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0eccbcb8447..e71e0e8362ed 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -31,7 +31,9 @@ * In vm86 mode, the hardware frame is much longer still, so add 16 * bytes to make room for the real-mode segments. * - * x86_64 has a fixed-length stack frame. + * x86-64 has a fixed-length stack frame, but it depends on whether + * or not FRED is enabled. Future versions of FRED might make this + * dynamic, but for now it is always 2 words longer. */ #ifdef CONFIG_X86_32 # ifdef CONFIG_VM86 @@ -39,8 +41,12 @@ # else # define TOP_OF_KERNEL_STACK_PADDING 8 # endif -#else -# define TOP_OF_KERNEL_STACK_PADDING 0 +#else /* x86-64 */ +# ifdef CONFIG_X86_FRED +# define TOP_OF_KERNEL_STACK_PADDING (2 * 8) +# else +# define TOP_OF_KERNEL_STACK_PADDING 0 +# endif #endif /* @@ -48,14 +54,18 @@ * - this struct should fit entirely inside of one cache line * - this struct shares the supervisor stack pages */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; #include <asm/cpufeature.h> #include <linux/atomic.h> struct thread_info { unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ u32 status; /* thread synchronous flags */ +#ifdef CONFIG_SMP + u32 cpu; /* current CPU */ +#endif }; #define INIT_THREAD_INFO(tsk) \ @@ -63,86 +73,53 @@ struct thread_info { .flags = 0, \ } -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #include <asm/asm-offsets.h> #endif /* - * thread information flags - * - these are process state flags that various assembly files - * may need to access + * Tell the generic TIF infrastructure which bits x86 supports */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Speculative store bypass disable */ -#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ -#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ -#define TIF_SECCOMP 8 /* secure computing */ -#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ -#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ -#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ -#define TIF_UPROBE 12 /* breakpointed or singlestepping */ -#define TIF_PATCH_PENDING 13 /* pending live patching update */ -#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ -#define TIF_NOTSC 16 /* TSC is not accessible in userland */ -#define TIF_IA32 17 /* IA32 compatibility process */ -#define TIF_NOHZ 19 /* in adaptive nohz mode */ -#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ -#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ +#define HAVE_TIF_NEED_RESCHED_LAZY +#define HAVE_TIF_POLLING_NRFLAG +#define HAVE_TIF_SINGLESTEP + +#include <asm-generic/thread_info_tif.h> + +/* Architecture specific TIF space starts at 16 */ +#define TIF_SSBD 16 /* Speculative store bypass disable */ +#define TIF_SPEC_IB 17 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_L1D_FLUSH 18 /* Flush L1D on mm switches (processes) */ +#define TIF_NEED_FPU_LOAD 19 /* load FPU on return to userspace */ +#define TIF_NOCPUID 20 /* CPUID is not accessible in userland */ +#define TIF_NOTSC 21 /* TSC is not accessible in userland */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ +#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/ +#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ -#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ -#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ -#define TIF_X32 30 /* 32-bit native x86-64 binary */ -#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_SSBD (1 << TIF_SSBD) -#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) -#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) -#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) -#define _TIF_UPROBE (1 << TIF_UPROBE) -#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) -#define _TIF_NOCPUID (1 << TIF_NOCPUID) -#define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_NOHZ (1 << TIF_NOHZ) -#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) -#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) -#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) -#define _TIF_ADDR32 (1 << TIF_ADDR32) -#define _TIF_X32 (1 << TIF_X32) -#define _TIF_FSCHECK (1 << TIF_FSCHECK) - -/* - * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for - * enter_from_user_mode() - */ -#define _TIF_WORK_SYSCALL_ENTRY \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ - _TIF_NOHZ) +#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */ + +#define _TIF_SSBD BIT(TIF_SSBD) +#define _TIF_SPEC_IB BIT(TIF_SPEC_IB) +#define _TIF_SPEC_L1D_FLUSH BIT(TIF_SPEC_L1D_FLUSH) +#define _TIF_NEED_FPU_LOAD BIT(TIF_NEED_FPU_LOAD) +#define _TIF_NOCPUID BIT(TIF_NOCPUID) +#define _TIF_NOTSC BIT(TIF_NOTSC) +#define _TIF_IO_BITMAP BIT(TIF_IO_BITMAP) +#define _TIF_SPEC_FORCE_UPDATE BIT(TIF_SPEC_FORCE_UPDATE) +#define _TIF_FORCED_TF BIT(TIF_FORCED_TF) +#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP) +#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP) +#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES) +#define _TIF_ADDR32 BIT(TIF_ADDR32) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW_BASE \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ +#define _TIF_WORK_CTXSW_BASE \ + (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* @@ -154,8 +131,14 @@ struct thread_info { # define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) #endif -#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#ifdef CONFIG_X86_IOPL_IOPERM +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ + _TIF_IO_BITMAP) +#else +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY) +#endif + +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define STACK_WARN (THREAD_SIZE/8) @@ -164,7 +147,7 @@ struct thread_info { * * preempt_count needs to be 1 initially, until the scheduler is functional. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Walks up the stack frames to make sure that the specified object is @@ -174,7 +157,12 @@ struct thread_info { * GOOD_FRAME if within a frame * BAD_STACK if placed across a frame boundary (or outside stack) * NOT_STACK unable to determine (no frame pointers, etc) + * + * This function reads pointers from the stack and dereferences them. The + * pointers may not have their KMSAN shadow set up properly, which may result + * in false positive reports. Disable instrumentation to avoid those. */ +__no_kmsan_checks static inline int arch_within_stack_frames(const void * const stack, const void * const stackend, const void *obj, unsigned long len) @@ -211,18 +199,25 @@ static inline int arch_within_stack_frames(const void * const stack, #endif } -#else /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#ifdef CONFIG_X86_64 -# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) -#endif - -#endif +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +#ifndef __ASSEMBLER__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ + +#define arch_set_restart_data(restart) \ + do { restart->arch_data = current_thread_info()->status; } while (0) + #endif -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_32 #define in_ia32_syscall() true @@ -231,20 +226,8 @@ static inline int arch_within_stack_frames(const void * const stack, current_thread_info()->status & TS_COMPAT) #endif -/* - * Force syscall return via IRET by making it look as if there was - * some work pending. IRET is our most capable (but slowest) syscall - * return path, which is able to restore modified SS, CS and certain - * EFLAGS values that other (fast) syscall return instructions - * are not able to restore properly. - */ -#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) - -extern void arch_task_cache_init(void); -extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -extern void arch_release_task_struct(struct task_struct *tsk); extern void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index cef818b16045..f360104ed172 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -6,7 +6,8 @@ #include <asm/mc146818rtc.h> extern void hpet_time_init(void); -extern void time_init(void); +extern bool pit_timer_init(void); +extern bool tsc_clocksource_watchdog_disabled(void); extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 7365dd4acffb..23baf8c9b34c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -6,8 +6,6 @@ #include <linux/interrupt.h> #include <linux/math64.h> -#define TICK_SIZE (tick_nsec / 1000) - unsigned long long native_sched_clock(void); extern void recalibrate_cpu_khz(void); diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index a4a8b1b16c0c..956e4145311b 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -5,6 +5,15 @@ #include <asm/processor.h> #include <asm/tsc.h> +static inline unsigned long random_get_entropy(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return random_get_entropy_fallback(); + return rdtsc(); +} +#define random_get_entropy random_get_entropy + /* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 404b8b1d44f5..866ea78ba156 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -2,13 +2,13 @@ #ifndef _ASM_X86_TLB_H #define _ASM_X86_TLB_H -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) - +#define tlb_flush tlb_flush static inline void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> +#include <linux/kernel.h> +#include <vdso/bits.h> +#include <vdso/page.h> static inline void tlb_flush(struct mmu_gather *tlb) { @@ -23,18 +23,144 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables); } +static inline void invlpg(unsigned long addr) +{ + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); +} + +enum addr_stride { + PTE_STRIDE = 0, + PMD_STRIDE = 1 +}; + +/* + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination + * of the three. For example: + * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address + * - FLAG_PCID: invalidate all TLB entries matching the PCID + * + * The first is used to invalidate (kernel) mappings at a particular + * address across all processes. + * + * The latter invalidates all TLB entries matching a PCID. + */ +#define INVLPGB_FLAG_VA BIT(0) +#define INVLPGB_FLAG_PCID BIT(1) +#define INVLPGB_FLAG_ASID BIT(2) +#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3) +#define INVLPGB_FLAG_FINAL_ONLY BIT(4) +#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5) + +/* The implied mode when all bits are clear: */ +#define INVLPGB_MODE_ALL_NONGLOBALS 0UL + +#ifdef CONFIG_BROADCAST_TLB_FLUSH /* - * While x86 architecture in general requires an IPI to perform TLB - * shootdown, enablement code for several hypervisors overrides - * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing - * a hypercall. To keep software pagetable walkers safe in this case we - * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment - * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h - * for more details. + * INVLPGB does broadcast TLB invalidation across all the CPUs in the system. + * + * The INVLPGB instruction is weakly ordered, and a batch of invalidations can + * be done in a parallel fashion. + * + * The instruction takes the number of extra pages to invalidate, beyond the + * first page, while __invlpgb gets the more human readable number of pages to + * invalidate. + * + * The bits in rax[0:2] determine respectively which components of the address + * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any* + * address in the specified range matches. + * + * Since it is desired to only flush TLB entries for the ASID that is executing + * the instruction (a host/hypervisor or a guest), the ASID valid bit should + * always be set. On a host/hypervisor, the hardware will use the ASID value + * specified in EDX[15:0] (which should be 0). On a guest, the hardware will + * use the actual ASID value of the guest. + * + * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from + * this CPU have completed. */ -static inline void __tlb_remove_table(void *table) +static inline void __invlpgb(unsigned long asid, unsigned long pcid, + unsigned long addr, u16 nr_pages, + enum addr_stride stride, u8 flags) { - free_page_and_swap_cache(table); + u64 rax = addr | flags | INVLPGB_FLAG_ASID; + u32 ecx = (stride << 31) | (nr_pages - 1); + u32 edx = (pcid << 16) | asid; + + /* The low bits in rax are for flags. Verify addr is clean. */ + VM_WARN_ON_ONCE(addr & ~PAGE_MASK); + + /* INVLPGB; supported in binutils >= 2.36. */ + asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx)); +} + +static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) +{ + __invlpgb(asid, pcid, 0, 1, 0, flags); } +static inline void __tlbsync(void) +{ + /* + * TLBSYNC waits for INVLPGB instructions originating on the same CPU + * to have completed. Print a warning if the task has been migrated, + * and might not be waiting on all the INVLPGBs issued during this TLB + * invalidation sequence. + */ + cant_migrate(); + + /* TLBSYNC: supported in binutils >= 0.36. */ + asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory"); +} +#else +/* Some compilers (I'm looking at you clang!) simply can't do DCE */ +static inline void __invlpgb(unsigned long asid, unsigned long pcid, + unsigned long addr, u16 nr_pages, + enum addr_stride s, u8 flags) { } +static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { } +static inline void __tlbsync(void) { } +#endif + +static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, + unsigned long addr, + u16 nr, bool stride) +{ + enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE; + u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA; + + __invlpgb(0, pcid, addr, nr, str, flags); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid) +{ + __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invlpgb_flush_all(void) +{ + /* + * TLBSYNC at the end needs to make sure all flushes done on the + * current CPU have been executed system-wide. Therefore, make + * sure nothing gets migrated in-between but disable preemption + * as it is cheaper. + */ + guard(preempt)(); + __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL); + __tlbsync(); +} + +/* Flush addr, including globals, for all PCIDs. */ +static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr) +{ + __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invlpgb_flush_all_nonglobals(void) +{ + guard(preempt)(); + __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS); + __tlbsync(); +} #endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h index 1ad56eb3e8a8..80aaf64ff25f 100644 --- a/arch/x86/include/asm/tlbbatch.h +++ b/arch/x86/include/asm/tlbbatch.h @@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch { * the PFNs being flushed.. */ struct cpumask cpumask; + /* + * Set if pages were unmapped from any MM, even one that does not + * have active CPUs in its cpumask. + */ + bool unmapped_pages; }; #endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index f4204bf377fc..00daedfefc1b 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -2,9 +2,11 @@ #ifndef _ASM_X86_TLBFLUSH_H #define _ASM_X86_TLBFLUSH_H -#include <linux/mm.h> +#include <linux/mm_types.h> +#include <linux/mmu_notifier.h> #include <linux/sched.h> +#include <asm/barrier.h> #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> @@ -12,141 +14,56 @@ #include <asm/invpcid.h> #include <asm/pti.h> #include <asm/processor-flags.h> +#include <asm/pgtable.h> -/* - * The x86 feature is called PCID (Process Context IDentifier). It is similar - * to what is traditionally called ASID on the RISC processors. - * - * We don't use the traditional ASID implementation, where each process/mm gets - * its own ASID and flush/restart when we run out of ASID space. - * - * Instead we have a small per-cpu array of ASIDs and cache the last few mm's - * that came by on this CPU, allowing cheaper switch_mm between processes on - * this CPU. - * - * We end up with different spaces for different things. To avoid confusion we - * use different names for each of them: - * - * ASID - [0, TLB_NR_DYN_ASIDS-1] - * the canonical identifier for an mm - * - * kPCID - [1, TLB_NR_DYN_ASIDS] - * the value we write into the PCID part of CR3; corresponds to the - * ASID+1, because PCID 0 is special. - * - * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] - * for KPTI each mm has two address spaces and thus needs two - * PCID values, but we can still do with a single ASID denomination - * for each mm. Corresponds to kPCID + 2048. - * - */ +DECLARE_PER_CPU(u64, tlbstate_untag_mask); -/* There are 12 bits of space for ASIDS in CR3 */ -#define CR3_HW_ASID_BITS 12 +void __flush_tlb_all(void); -/* - * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for - * user/kernel switches - */ -#ifdef CONFIG_PAGE_TABLE_ISOLATION -# define PTI_CONSUMED_PCID_BITS 1 -#else -# define PTI_CONSUMED_PCID_BITS 0 -#endif +#define TLB_FLUSH_ALL -1UL +#define TLB_GENERATION_INVALID 0 -#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) +void cr4_update_irqsoff(unsigned long set, unsigned long clear); +unsigned long cr4_read_shadow(void); -/* - * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account - * for them being zero-based. Another -1 is because PCID 0 is reserved for - * use by non-PCID-aware users. - */ -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) - -/* - * 6 because 6 should be plenty and struct tlb_state will fit in two cache - * lines. - */ -#define TLB_NR_DYN_ASIDS 6 - -/* - * Given @asid, compute kPCID - */ -static inline u16 kern_pcid(u16 asid) +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits_irqsoff(unsigned long mask) { - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - -#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * Make sure that the dynamic ASID space does not confict with the - * bit we are using to switch between user and kernel ASIDs. - */ - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); - - /* - * The ASID being passed in here should have respected the - * MAX_ASID_AVAILABLE and thus never have the switch bit set. - */ - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); -#endif - /* - * The dynamically-assigned ASIDs that get passed in are small - * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set, - * so do not bother to clear it. - * - * If PCID is on, ASID-aware code paths put the ASID+1 into the - * PCID bits. This serves two purposes. It prevents a nasty - * situation in which PCID-unaware code saves CR3, loads some other - * value (with PCID == 0), and then restores CR3, thus corrupting - * the TLB for ASID 0 if the saved ASID was nonzero. It also means - * that any bugs involving loading a PCID-enabled CR3 with - * CR4.PCIDE off will trigger deterministically. - */ - return asid + 1; + cr4_update_irqsoff(mask, 0); } -/* - * Given @asid, compute uPCID - */ -static inline u16 user_pcid(u16 asid) +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits_irqsoff(unsigned long mask) { - u16 ret = kern_pcid(asid); -#ifdef CONFIG_PAGE_TABLE_ISOLATION - ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; -#endif - return ret; + cr4_update_irqsoff(0, mask); } -struct pgd_t; -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits(unsigned long mask) { - if (static_cpu_has(X86_FEATURE_PCID)) { - return __sme_pa(pgd) | kern_pcid(asid); - } else { - VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(pgd); - } + unsigned long flags; + + local_irq_save(flags); + cr4_set_bits_irqsoff(mask); + local_irq_restore(flags); } -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits(unsigned long mask) { - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - /* - * Use boot_cpu_has() instead of this_cpu_has() as this function - * might be called during early boot. This should work even after - * boot because all CPU's the have same capabilities: - */ - VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); - return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; + unsigned long flags; + + local_irq_save(flags); + cr4_clear_bits_irqsoff(mask); + local_irq_restore(flags); } -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define __flush_tlb() __native_flush_tlb() -#define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) -#endif +#ifndef MODULE +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 struct tlb_context { u64 ctx_id; @@ -167,35 +84,18 @@ struct tlb_state { */ struct mm_struct *loaded_mm; -#define LOADED_MM_SWITCHING ((struct mm_struct *)1) +#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL) /* Last user mm for optimizing IBPB */ union { struct mm_struct *last_user_mm; - unsigned long last_user_mm_ibpb; + unsigned long last_user_mm_spec; }; u16 loaded_mm_asid; u16 next_asid; /* - * We can be in one of several states: - * - * - Actively using an mm. Our CPU's bit will be set in - * mm_cpumask(loaded_mm) and is_lazy == false; - * - * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit - * will not be set in mm_cpumask(&init_mm) and is_lazy == false. - * - * - Lazily using a real mm. loaded_mm != &init_mm, our bit - * is set in mm_cpumask(loaded_mm), but is_lazy == true. - * We're heuristically guessing that the CR3 load we - * skipped more than makes up for the overhead added by - * lazy mode. - */ - bool is_lazy; - - /* * If set we changed the page tables in such a way that we * needed an invalidation of all contexts (aka. PCIDs / ASIDs). * This tells us to go invalidate all the non-loaded ctxs[] @@ -206,6 +106,16 @@ struct tlb_state { */ bool invalidate_other; +#ifdef CONFIG_ADDRESS_MASKING + /* + * Active LAM mode. + * + * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM + * disabled. + */ + u8 lam; +#endif + /* * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate * the corresponding user PCID needs a flush next time we @@ -240,39 +150,30 @@ struct tlb_state { */ struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; }; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); - -/* - * Blindly accessing user memory from NMI context can be dangerous - * if we're in the middle of switching the current user task or - * switching the loaded mm. It can also be dangerous if we - * interrupted some kernel code that was temporarily using a - * different mm. - */ -static inline bool nmi_uaccess_okay(void) -{ - struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); - struct mm_struct *current_mm = current->mm; - - VM_WARN_ON_ONCE(!loaded_mm); +DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate); +struct tlb_state_shared { /* - * The condition we want to check is - * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, - * if we're running in a VM with shadow paging, and nmi_uaccess_okay() - * is supposed to be reasonably fast. + * We can be in one of several states: + * + * - Actively using an mm. Our CPU's bit will be set in + * mm_cpumask(loaded_mm) and is_lazy == false; * - * Instead, we check the almost equivalent but somewhat conservative - * condition below, and we rely on the fact that switch_mm_irqs_off() - * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. + * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit + * will not be set in mm_cpumask(&init_mm) and is_lazy == false. + * + * - Lazily using a real mm. loaded_mm != &init_mm, our bit + * is set in mm_cpumask(loaded_mm), but is_lazy == true. + * We're heuristically guessing that the CR3 load we + * skipped more than makes up for the overhead added by + * lazy mode. */ - if (loaded_mm != current_mm) - return false; + bool is_lazy; +}; +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); - VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); - - return true; -} +bool nmi_uaccess_okay(void); +#define nmi_uaccess_okay nmi_uaccess_okay /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) @@ -280,234 +181,15 @@ static inline void cr4_init_shadow(void) this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } -static inline void __cr4_set(unsigned long cr4) -{ - lockdep_assert_irqs_disabled(); - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); -} - -/* Set in this cpu's CR4. */ -static inline void cr4_set_bits(unsigned long mask) -{ - unsigned long cr4, flags; - - local_irq_save(flags); - cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 | mask) != cr4) - __cr4_set(cr4 | mask); - local_irq_restore(flags); -} - -/* Clear in this cpu's CR4. */ -static inline void cr4_clear_bits(unsigned long mask) -{ - unsigned long cr4, flags; - - local_irq_save(flags); - cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) - __cr4_set(cr4 & ~mask); - local_irq_restore(flags); -} - -static inline void cr4_toggle_bits_irqsoff(unsigned long mask) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - __cr4_set(cr4 ^ mask); -} - -/* Read the CR4 shadow. */ -static inline unsigned long cr4_read_shadow(void) -{ - return this_cpu_read(cpu_tlbstate.cr4); -} - -/* - * Mark all other ASIDs as invalid, preserves the current. - */ -static inline void invalidate_other_asid(void) -{ - this_cpu_write(cpu_tlbstate.invalidate_other, true); -} - -/* - * Save some of cr4 feature set we're using (e.g. Pentium 4MB - * enable and PPro Global page enable), so that any CPU's that boot - * up after us can get the correct flags. This should only be used - * during boot on the boot cpu. - */ extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; -static inline void cr4_set_bits_and_update_boot(unsigned long mask) -{ - mmu_cr4_features |= mask; - if (trampoline_cr4_features) - *trampoline_cr4_features = mmu_cr4_features; - cr4_set_bits(mask); -} +/* How many pages can be invalidated with one INVLPGB. */ +extern u16 invlpgb_count_max; extern void initialize_tlbstate_and_flush(void); /* - * Given an ASID, flush the corresponding user ASID. We can delay this - * until the next time we switch to it. - * - * See SWITCH_TO_USER_CR3. - */ -static inline void invalidate_user_asid(u16 asid) -{ - /* There is no user ASID if address space separation is off */ - if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) - return; - - /* - * We only have a single ASID if PCID is off and the CR3 - * write will have flushed it. - */ - if (!cpu_feature_enabled(X86_FEATURE_PCID)) - return; - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - __set_bit(kern_pcid(asid), - (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); -} - -/* - * flush the entire current user mapping - */ -static inline void __native_flush_tlb(void) -{ - /* - * Preemption or interrupts must be disabled to protect the access - * to the per CPU variable and to prevent being preempted between - * read_cr3() and write_cr3(). - */ - WARN_ON_ONCE(preemptible()); - - invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); - - /* If current->mm == NULL then the read_cr3() "borrows" an mm */ - native_write_cr3(__native_read_cr3()); -} - -/* - * flush everything - */ -static inline void __native_flush_tlb_global(void) -{ - unsigned long cr4, flags; - - if (static_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Using INVPCID is considerably faster than a pair of writes - * to CR4 sandwiched inside an IRQ flag save/restore. - * - * Note, this works with CR4.PCIDE=0 or 1. - */ - invpcid_flush_all(); - return; - } - - /* - * Read-modify-write to CR4 - protect it from preemption and - * from interrupts. (Use the raw variant because this code can - * be called from deep inside debugging code.) - */ - raw_local_irq_save(flags); - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* toggle PGE */ - native_write_cr4(cr4 ^ X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); - - raw_local_irq_restore(flags); -} - -/* - * flush one page in the user mapping - */ -static inline void __native_flush_tlb_one_user(unsigned long addr) -{ - u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); - - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - /* - * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. - * Just use invalidate_user_asid() in case we are called early. - */ - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) - invalidate_user_asid(loaded_mm_asid); - else - invpcid_flush_one(user_pcid(loaded_mm_asid), addr); -} - -/* - * flush everything - */ -static inline void __flush_tlb_all(void) -{ - /* - * This is to catch users with enabled preemption and the PGE feature - * and don't trigger the warning in __native_flush_tlb(). - */ - VM_WARN_ON_ONCE(preemptible()); - - if (boot_cpu_has(X86_FEATURE_PGE)) { - __flush_tlb_global(); - } else { - /* - * !PGE -> !PCID (setup_pcid()), thus every flush is total. - */ - __flush_tlb(); - } -} - -/* - * flush one page in the kernel mapping - */ -static inline void __flush_tlb_one_kernel(unsigned long addr) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - - /* - * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its - * paravirt equivalent. Even with PCID, this is sufficient: we only - * use PCID if we also use global PTEs for the kernel mapping, and - * INVLPG flushes global translations across all address spaces. - * - * If PTI is on, then the kernel is mapped with non-global PTEs, and - * __flush_tlb_one_user() will flush the given address for the current - * kernel address space and for its usermode counterpart, but it does - * not flush it for other address spaces. - */ - __flush_tlb_one_user(addr); - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - /* - * See above. We need to propagate the flush to all other address - * spaces. In principle, we only need to propagate it to kernelmode - * address spaces, but the extra bookkeeping we would need is not - * worth it. - */ - invalidate_other_asid(); -} - -#define TLB_FLUSH_ALL -1UL - -/* * TLB flushing: * * - flush_tlb_all() flushes all processes TLBs @@ -515,7 +197,7 @@ static inline void __flush_tlb_one_kernel(unsigned long addr) * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus + * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. @@ -541,11 +223,86 @@ struct flush_tlb_info { unsigned long start; unsigned long end; u64 new_tlb_gen; - unsigned int stride_shift; - bool freed_tables; + unsigned int initiating_cpu; + u8 stride_shift; + u8 freed_tables; + u8 trim_cpumask; }; -#define local_flush_tlb() __flush_tlb() +void flush_tlb_local(void); +void flush_tlb_one_user(unsigned long addr); +void flush_tlb_one_kernel(unsigned long addr); +void flush_tlb_multi(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + +static inline bool is_dyn_asid(u16 asid) +{ + return asid < TLB_NR_DYN_ASIDS; +} + +static inline bool is_global_asid(u16 asid) +{ + return !is_dyn_asid(asid); +} + +#ifdef CONFIG_BROADCAST_TLB_FLUSH +static inline u16 mm_global_asid(struct mm_struct *mm) +{ + u16 asid; + + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + return 0; + + asid = smp_load_acquire(&mm->context.global_asid); + + /* mm->context.global_asid is either 0, or a global ASID */ + VM_WARN_ON_ONCE(asid && is_dyn_asid(asid)); + + return asid; +} + +static inline void mm_init_global_asid(struct mm_struct *mm) +{ + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + mm->context.global_asid = 0; + mm->context.asid_transition = false; + } +} + +static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) +{ + /* + * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() -> + * finish_asid_transition() needs to observe asid_transition = true + * once it observes global_asid. + */ + mm->context.asid_transition = true; + smp_store_release(&mm->context.global_asid, asid); +} + +static inline void mm_clear_asid_transition(struct mm_struct *mm) +{ + WRITE_ONCE(mm->context.asid_transition, false); +} + +static inline bool mm_in_asid_transition(struct mm_struct *mm) +{ + if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) + return false; + + return mm && READ_ONCE(mm->context.asid_transition); +} +#else +static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; } +static inline void mm_init_global_asid(struct mm_struct *mm) { } +static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { } +static inline void mm_clear_asid_transition(struct mm_struct *mm) { } +static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; } +#endif /* CONFIG_BROADCAST_TLB_FLUSH */ + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#endif #define flush_tlb_mm(mm) \ flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) @@ -554,7 +311,7 @@ struct flush_tlb_info { flush_tlb_mm_range((vma)->vm_mm, start, end, \ ((vma)->vm_flags & VM_HUGETLB) \ ? huge_page_shift(hstate_vma(vma)) \ - : PAGE_SHIFT, false) + : PAGE_SHIFT, true) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, @@ -567,8 +324,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); } -void native_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info); +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) +{ + bool should_defer = false; + + /* If remote CPUs need to be flushed then defer batch the flush */ + if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids) + should_defer = true; + put_cpu(); + + return should_defer; +} static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { @@ -581,21 +347,145 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } -static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm) +static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, unsigned long start, unsigned long end) { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + batch->unmapped_pages = true; + mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); -#ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, info) \ - native_flush_tlb_others(mask, info) +static inline bool pte_flags_need_flush(unsigned long oldflags, + unsigned long newflags, + bool ignore_access) +{ + /* + * Flags that require a flush when cleared but not when they are set. + * Only include flags that would not trigger spurious page-faults. + * Non-present entries are not cached. Hardware would set the + * dirty/access bit if needed without a fault. + */ + const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | + _PAGE_ACCESSED; + const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | + _PAGE_SOFTW3 | _PAGE_SOFTW4 | + _PAGE_SAVED_DIRTY; + const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | + _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | + _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | + _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX; + unsigned long diff = oldflags ^ newflags; + + BUILD_BUG_ON(flush_on_clear & software_flags); + BUILD_BUG_ON(flush_on_clear & flush_on_change); + BUILD_BUG_ON(flush_on_change & software_flags); + + /* Ignore software flags */ + diff &= ~software_flags; + + if (ignore_access) + diff &= ~_PAGE_ACCESSED; + + /* + * Did any of the 'flush_on_clear' flags was clleared set from between + * 'oldflags' and 'newflags'? + */ + if (diff & oldflags & flush_on_clear) + return true; + + /* Flush on modified flags. */ + if (diff & flush_on_change) + return true; + + /* Ensure there are no flags that were left behind */ + if (IS_ENABLED(CONFIG_DEBUG_VM) && + (diff & ~(flush_on_clear | software_flags | flush_on_change))) { + VM_WARN_ON_ONCE(1); + return true; + } -#define paravirt_tlb_remove_table(tlb, page) \ - tlb_remove_page(tlb, (void *)(page)) + return false; +} + +/* + * pte_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace PTEs. + */ +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pte_flags(oldpte) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pte_pfn(oldpte) != pte_pfn(newpte)) + return true; + + /* + * check PTE flags; ignore access-bit; see comment in + * ptep_clear_flush_young(). + */ + return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte), + true); +} +#define pte_needs_flush pte_needs_flush + +/* + * huge_pmd_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace huge PMDs. + */ +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pmd_flags(oldpmd) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pmd_pfn(oldpmd) != pmd_pfn(newpmd)) + return true; + + /* + * check PMD flags; do not ignore access-bit; see + * pmdp_clear_flush_young(). + */ + return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd), + false); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + +#ifdef CONFIG_ADDRESS_MASKING +static inline u64 tlbstate_lam_cr3_mask(void) +{ + u64 lam = this_cpu_read(cpu_tlbstate.lam); + + return lam << X86_CR3_LAM_U57_BIT; +} + +static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask) +{ + this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT); + this_cpu_write(tlbstate_untag_mask, untag_mask); +} + +#else + +static inline u64 tlbstate_lam_cr3_mask(void) +{ + return 0; +} + +static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask) +{ +} #endif +#endif /* !MODULE */ +static inline void __native_tlb_flush_global(unsigned long cr4) +{ + native_write_cr4(cr4 ^ X86_CR4_PGE); + native_write_cr4(cr4); +} #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 453cf38a1c33..1fadf0cf520c 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -31,9 +31,9 @@ * CONFIG_NUMA. */ #include <linux/numa.h> +#include <linux/cpumask.h> #ifdef CONFIG_NUMA -#include <linux/cpumask.h> #include <asm/mpspec.h> #include <asm/percpu.h> @@ -102,18 +102,105 @@ static inline void setup_node_to_cpumask_map(void) { } #include <asm-generic/topology.h> +/* Topology information */ +enum x86_topology_domains { + TOPO_SMT_DOMAIN, + TOPO_CORE_DOMAIN, + TOPO_MODULE_DOMAIN, + TOPO_TILE_DOMAIN, + TOPO_DIE_DOMAIN, + TOPO_DIEGRP_DOMAIN, + TOPO_PKG_DOMAIN, + TOPO_MAX_DOMAIN, +}; + +enum x86_topology_cpu_type { + TOPO_CPU_TYPE_PERFORMANCE, + TOPO_CPU_TYPE_EFFICIENCY, + TOPO_CPU_TYPE_UNKNOWN, +}; + +struct x86_topology_system { + unsigned int dom_shifts[TOPO_MAX_DOMAIN]; + unsigned int dom_size[TOPO_MAX_DOMAIN]; +}; + +extern struct x86_topology_system x86_topo_system; + +static inline unsigned int topology_get_domain_size(enum x86_topology_domains dom) +{ + return x86_topo_system.dom_size[dom]; +} + +static inline unsigned int topology_get_domain_shift(enum x86_topology_domains dom) +{ + return dom == TOPO_SMT_DOMAIN ? 0 : x86_topo_system.dom_shifts[dom - 1]; +} + extern const struct cpumask *cpu_coregroup_mask(int cpu); +extern const struct cpumask *cpu_clustergroup_mask(int cpu); + +#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id) +#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id) +#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id) +#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id) +#define topology_die_id(cpu) (cpu_data(cpu).topo.die_id) +#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) +#define topology_ppin(cpu) (cpu_data(cpu).ppin) -#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) -#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) -#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id) + +extern unsigned int __max_dies_per_package; +extern unsigned int __max_logical_packages; +extern unsigned int __max_threads_per_core; +extern unsigned int __num_threads_per_package; +extern unsigned int __num_cores_per_package; + +const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); +enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c); + +static inline unsigned int topology_max_packages(void) +{ + return __max_logical_packages; +} + +static inline unsigned int topology_max_dies_per_package(void) +{ + return __max_dies_per_package; +} + +static inline unsigned int topology_num_cores_per_package(void) +{ + return __num_cores_per_package; +} + +static inline unsigned int topology_num_threads_per_package(void) +{ + return __num_threads_per_package; +} + +#ifdef CONFIG_X86_LOCAL_APIC +int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); +#else +static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) +{ + return 0; +} +#endif #ifdef CONFIG_SMP +#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id) +#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) +#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -extern unsigned int __max_logical_packages; -#define topology_max_packages() (__max_logical_packages) + +static inline int topology_phys_to_logical_pkg(unsigned int pkg) +{ + return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN], + TOPO_PKG_DOMAIN); +} extern int __max_smt_threads; @@ -122,19 +209,43 @@ static inline int topology_max_smt_threads(void) return __max_smt_threads; } -int topology_update_package_map(unsigned int apicid, unsigned int cpu); -int topology_phys_to_logical_pkg(unsigned int pkg); -bool topology_is_primary_thread(unsigned int cpu); -bool topology_smt_supported(void); -#else -#define topology_max_packages() (1) -static inline int -topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } +#include <linux/cpu_smt.h> + +extern unsigned int __amd_nodes_per_pkg; + +static inline unsigned int topology_amd_nodes_per_pkg(void) +{ + return __amd_nodes_per_pkg; +} + +#else /* CONFIG_SMP */ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } -static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } -static inline bool topology_smt_supported(void) { return false; } -#endif +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } +#endif /* !CONFIG_SMP */ + +extern struct cpumask __cpu_primary_thread_mask; +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) + +/** + * topology_is_primary_thread - Check whether CPU is the primary SMT thread + * @cpu: CPU to check + */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_primary_thread_mask); +} +#define topology_is_primary_thread topology_is_primary_thread + +int topology_get_primary_thread(unsigned int cpu); + +static inline bool topology_is_core_online(unsigned int cpu) +{ + int pcpu = topology_get_primary_thread(cpu); + + return pcpu >= 0 ? cpu_online(pcpu) : false; +} +#define topology_is_core_online topology_is_core_online static inline void arch_fix_phys_package_id(int num, u32 slot) { @@ -150,7 +261,7 @@ extern bool x86_topology_update; #include <asm/percpu.h> DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority); -extern unsigned int __read_mostly sysctl_sched_itmt_enabled; +extern bool __read_mostly sysctl_sched_itmt_enabled; /* Interface to set priority of a cpu */ void sched_set_itmt_core_prio(int prio, int core_cpu); @@ -163,7 +274,7 @@ void sched_clear_itmt_support(void); #else /* CONFIG_SCHED_MC_PRIO */ -#define sysctl_sched_itmt_enabled 0 +#define sysctl_sched_itmt_enabled false static inline void sched_set_itmt_core_prio(int prio, int core_cpu) { } @@ -176,4 +287,44 @@ static inline void sched_clear_itmt_support(void) } #endif /* CONFIG_SCHED_MC_PRIO */ +#if defined(CONFIG_SMP) && defined(CONFIG_X86_64) +#include <asm/cpufeature.h> + +DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key); + +#define arch_scale_freq_invariant() static_branch_likely(&arch_scale_freq_key) + +DECLARE_PER_CPU(unsigned long, arch_freq_scale); + +static inline long arch_scale_freq_capacity(int cpu) +{ + return per_cpu(arch_freq_scale, cpu); +} +#define arch_scale_freq_capacity arch_scale_freq_capacity + +bool arch_enable_hybrid_capacity_scale(void); +void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap, + unsigned long cap_freq, unsigned long base_freq); + +unsigned long arch_scale_cpu_capacity(int cpu); +#define arch_scale_cpu_capacity arch_scale_cpu_capacity + +extern void arch_set_max_freq_ratio(bool turbo_disabled); +extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled); +#else +static inline bool arch_enable_hybrid_capacity_scale(void) { return false; } +static inline void arch_set_cpu_capacity(int cpu, unsigned long cap, + unsigned long max_cap, + unsigned long cap_freq, + unsigned long base_freq) { } + +static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } +static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { } +#endif + +extern void arch_scale_freq_tick(void); +#define arch_scale_freq_tick arch_scale_freq_tick + +extern int arch_sched_node_distance(int from, int to); + #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h deleted file mode 100644 index 57c8da027d99..000000000000 --- a/arch/x86/include/asm/trace/common.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _ASM_TRACE_COMMON_H -#define _ASM_TRACE_COMMON_H - -#ifdef CONFIG_TRACING -DECLARE_STATIC_KEY_FALSE(trace_pagefault_key); -#define trace_pagefault_enabled() \ - static_branch_unlikely(&trace_pagefault_key) -DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key); -#define trace_resched_ipi_enabled() \ - static_branch_unlikely(&trace_resched_ipi_key) -#else -static inline bool trace_pagefault_enabled(void) { return false; } -static inline bool trace_resched_ipi_enabled(void) { return false; } -#endif - -#endif diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h deleted file mode 100644 index e0e6d7f21399..000000000000 --- a/arch/x86/include/asm/trace/exceptions.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM exceptions - -#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_PAGE_FAULT_H - -#include <linux/tracepoint.h> -#include <asm/trace/common.h> - -extern int trace_pagefault_reg(void); -extern void trace_pagefault_unreg(void); - -DECLARE_EVENT_CLASS(x86_exceptions, - - TP_PROTO(unsigned long address, struct pt_regs *regs, - unsigned long error_code), - - TP_ARGS(address, regs, error_code), - - TP_STRUCT__entry( - __field( unsigned long, address ) - __field( unsigned long, ip ) - __field( unsigned long, error_code ) - ), - - TP_fast_assign( - __entry->address = address; - __entry->ip = regs->ip; - __entry->error_code = error_code; - ), - - TP_printk("address=%pf ip=%pf error_code=0x%lx", - (void *)__entry->address, (void *)__entry->ip, - __entry->error_code) ); - -#define DEFINE_PAGE_FAULT_EVENT(name) \ -DEFINE_EVENT_FN(x86_exceptions, name, \ - TP_PROTO(unsigned long address, struct pt_regs *regs, \ - unsigned long error_code), \ - TP_ARGS(address, regs, error_code), \ - trace_pagefault_reg, trace_pagefault_unreg); - -DEFINE_PAGE_FAULT_EVENT(page_fault_user); -DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE exceptions -#endif /* _TRACE_PAGE_FAULT_H */ - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 069c04be1507..721b408d9a67 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, initialized) + __field(bool, load_fpu) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->initialized = fpu->initialized; + __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { - __entry->xfeatures = fpu->state.xsave.header.xfeatures; - __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; + __entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures; + __entry->xcomp_bv = fpu->fpstate->regs.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->initialized, + __entry->load_fpu, __entry->xfeatures, __entry->xcomp_bv ) @@ -44,16 +44,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_after_save, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_before_restore, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - -DEFINE_EVENT(x86_fpu, x86_fpu_after_restore, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) @@ -64,26 +54,11 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - -DEFINE_EVENT(x86_fpu, x86_fpu_init_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_dropped, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_copy_src, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h index ace464f09681..a8e5a7a2b460 100644 --- a/arch/x86/include/asm/trace/hyperv.h +++ b/arch/x86/include/asm/trace/hyperv.h @@ -8,7 +8,7 @@ #if IS_ENABLED(CONFIG_HYPERV) -TRACE_EVENT(hyperv_mmu_flush_tlb_others, +TRACE_EVENT(hyperv_mmu_flush_tlb_multi, TP_PROTO(const struct cpumask *cpus, const struct flush_tlb_info *info), TP_ARGS(cpus, info), @@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask, __entry->ncpus, __entry->vector) ); +TRACE_EVENT(hyperv_send_ipi_one, + TP_PROTO(int cpu, + int vector), + TP_ARGS(cpu, vector), + TP_STRUCT__entry( + __field(int, cpu) + __field(int, vector) + ), + TP_fast_assign(__entry->cpu = cpu; + __entry->vector = vector; + ), + TP_printk("cpu %d vector %x", + __entry->cpu, __entry->vector) + ); + #endif /* CONFIG_HYPERV */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 33b9d0f0aafe..7408bebdfde0 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -6,13 +6,9 @@ #define _TRACE_IRQ_VECTORS_H #include <linux/tracepoint.h> -#include <asm/trace/common.h> #ifdef CONFIG_X86_LOCAL_APIC -extern int trace_resched_ipi_reg(void); -extern void trace_resched_ipi_unreg(void); - DECLARE_EVENT_CLASS(x86_irq_vector, TP_PROTO(int vector), @@ -37,18 +33,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ TP_PROTO(int vector), \ TP_ARGS(vector), NULL, NULL); -#define DEFINE_RESCHED_IPI_EVENT(name) \ -DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \ - TP_PROTO(int vector), \ - TP_ARGS(vector), \ - trace_resched_ipi_reg, \ - trace_resched_ipi_unreg); \ -DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ - TP_PROTO(int vector), \ - TP_ARGS(vector), \ - trace_resched_ipi_reg, \ - trace_resched_ipi_unreg); - /* * local_timer - called when entering/exiting a local timer interrupt * vector handler @@ -99,7 +83,7 @@ TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); /* * reschedule - called when entering/exiting a reschedule vector handler */ -DEFINE_RESCHED_IPI_EVENT(reschedule); +DEFINE_IRQ_VECTOR_EVENT(reschedule); /* * call_function - called when entering/exiting a call function interrupt diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h deleted file mode 100644 index 54133017267c..000000000000 --- a/arch/x86/include/asm/trace/mpx.h +++ /dev/null @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mpx - -#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MPX_H - -#include <linux/tracepoint.h> - -#ifdef CONFIG_X86_INTEL_MPX - -TRACE_EVENT(mpx_bounds_register_exception, - - TP_PROTO(void __user *addr_referenced, - const struct mpx_bndreg *bndreg), - TP_ARGS(addr_referenced, bndreg), - - TP_STRUCT__entry( - __field(void __user *, addr_referenced) - __field(u64, lower_bound) - __field(u64, upper_bound) - ), - - TP_fast_assign( - __entry->addr_referenced = addr_referenced; - __entry->lower_bound = bndreg->lower_bound; - __entry->upper_bound = bndreg->upper_bound; - ), - /* - * Note that we are printing out the '~' of the upper - * bounds register here. It is actually stored in its - * one's complement form so that its 'init' state - * corresponds to all 0's. But, that looks like - * gibberish when printed out, so print out the 1's - * complement instead of the actual value here. Note - * though that you still need to specify filters for the - * actual value, not the displayed one. - */ - TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx", - __entry->addr_referenced, - __entry->lower_bound, - ~__entry->upper_bound - ) -); - -TRACE_EVENT(bounds_exception_mpx, - - TP_PROTO(const struct mpx_bndcsr *bndcsr), - TP_ARGS(bndcsr), - - TP_STRUCT__entry( - __field(u64, bndcfgu) - __field(u64, bndstatus) - ), - - TP_fast_assign( - /* need to get rid of the 'const' on bndcsr */ - __entry->bndcfgu = (u64)bndcsr->bndcfgu; - __entry->bndstatus = (u64)bndcsr->bndstatus; - ), - - TP_printk("bndcfgu:0x%llx bndstatus:0x%llx", - __entry->bndcfgu, - __entry->bndstatus) -); - -DECLARE_EVENT_CLASS(mpx_range_trace, - - TP_PROTO(unsigned long start, - unsigned long end), - TP_ARGS(start, end), - - TP_STRUCT__entry( - __field(unsigned long, start) - __field(unsigned long, end) - ), - - TP_fast_assign( - __entry->start = start; - __entry->end = end; - ), - - TP_printk("[0x%p:0x%p]", - (void *)__entry->start, - (void *)__entry->end - ) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -TRACE_EVENT(mpx_new_bounds_table, - - TP_PROTO(unsigned long table_vaddr), - TP_ARGS(table_vaddr), - - TP_STRUCT__entry( - __field(unsigned long, table_vaddr) - ), - - TP_fast_assign( - __entry->table_vaddr = table_vaddr; - ), - - TP_printk("table vaddr:%p", (void *)__entry->table_vaddr) -); - -#else - -/* - * This gets used outside of MPX-specific code, so we need a stub. - */ -static inline -void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) -{ -} - -#endif /* CONFIG_X86_INTEL_MPX */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH asm/trace/ -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mpx -#endif /* _TRACE_MPX_H */ - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h new file mode 100644 index 000000000000..a23a7b707b64 --- /dev/null +++ b/arch/x86/include/asm/trap_pf.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRAP_PF_H +#define _ASM_X86_TRAP_PF_H + +#include <linux/bits.h> + +/* + * Page fault error code bits: + * + * bit 0 == 0: no page found 1: protection fault + * bit 1 == 0: read access 1: write access + * bit 2 == 0: kernel-mode access 1: user-mode access + * bit 3 == 1: use of reserved bit detected + * bit 4 == 1: fault was an instruction fetch + * bit 5 == 1: protection keys block access + * bit 6 == 1: shadow stack access fault + * bit 15 == 1: SGX MMU page-fault + * bit 31 == 1: fault was due to RMP violation + */ +enum x86_pf_error_code { + X86_PF_PROT = BIT(0), + X86_PF_WRITE = BIT(1), + X86_PF_USER = BIT(2), + X86_PF_RSVD = BIT(3), + X86_PF_INSTR = BIT(4), + X86_PF_PK = BIT(5), + X86_PF_SHSTK = BIT(6), + X86_PF_SGX = BIT(15), + X86_PF_RMP = BIT(31), +}; + +#endif /* _ASM_X86_TRAP_PF_H */ diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h new file mode 100644 index 000000000000..8d1154cdf787 --- /dev/null +++ b/arch/x86/include/asm/trapnr.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRAPNR_H +#define _ASM_X86_TRAPNR_H + +/* + * Event type codes used by FRED, Intel VT-x and AMD SVM + */ +#define EVENT_TYPE_EXTINT 0 // External interrupt +#define EVENT_TYPE_RESERVED 1 +#define EVENT_TYPE_NMI 2 // NMI +#define EVENT_TYPE_HWEXC 3 // Hardware originated traps, exceptions +#define EVENT_TYPE_SWINT 4 // INT n +#define EVENT_TYPE_PRIV_SWEXC 5 // INT1 +#define EVENT_TYPE_SWEXC 6 // INTO, INT3 +#define EVENT_TYPE_OTHER 7 // FRED SYSCALL/SYSENTER, VT-x MTF + +/* Interrupts/Exceptions */ + +#define X86_TRAP_DE 0 /* Divide-by-zero */ +#define X86_TRAP_DB 1 /* Debug */ +#define X86_TRAP_NMI 2 /* Non-maskable Interrupt */ +#define X86_TRAP_BP 3 /* Breakpoint */ +#define X86_TRAP_OF 4 /* Overflow */ +#define X86_TRAP_BR 5 /* Bound Range Exceeded */ +#define X86_TRAP_UD 6 /* Invalid Opcode */ +#define X86_TRAP_NM 7 /* Device Not Available */ +#define X86_TRAP_DF 8 /* Double Fault */ +#define X86_TRAP_OLD_MF 9 /* Coprocessor Segment Overrun */ +#define X86_TRAP_TS 10 /* Invalid TSS */ +#define X86_TRAP_NP 11 /* Segment Not Present */ +#define X86_TRAP_SS 12 /* Stack Segment Fault */ +#define X86_TRAP_GP 13 /* General Protection Fault */ +#define X86_TRAP_PF 14 /* Page Fault */ +#define X86_TRAP_SPURIOUS 15 /* Spurious Interrupt */ +#define X86_TRAP_MF 16 /* x87 Floating-Point Exception */ +#define X86_TRAP_AC 17 /* Alignment Check */ +#define X86_TRAP_MC 18 /* Machine Check */ +#define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */ +#define X86_TRAP_VE 20 /* Virtualization Exception */ +#define X86_TRAP_CP 21 /* Control Protection Exception */ +#define X86_TRAP_VC 29 /* VMM Communication Exception */ +#define X86_TRAP_IRET 32 /* IRET Exception */ + +#endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 7d6f3f3fad78..869b88061801 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -6,93 +6,24 @@ #include <linux/kprobes.h> #include <asm/debugreg.h> +#include <asm/idtentry.h> #include <asm/siginfo.h> /* TRAP_TRACE, ... */ +#include <asm/trap_pf.h> -#define dotraplinkage __visible - -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void nmi(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); #ifdef CONFIG_X86_64 -asmlinkage void double_fault(void); -#endif -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void async_page_fault(void); -asmlinkage void spurious_interrupt_bug(void); -asmlinkage void coprocessor_error(void); -asmlinkage void alignment_check(void); -#ifdef CONFIG_X86_MCE -asmlinkage void machine_check(void); -#endif /* CONFIG_X86_MCE */ -asmlinkage void simd_coprocessor_error(void); - -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -asmlinkage void xen_divide_error(void); -asmlinkage void xen_xennmi(void); -asmlinkage void xen_xendebug(void); -asmlinkage void xen_xenint3(void); -asmlinkage void xen_overflow(void); -asmlinkage void xen_bounds(void); -asmlinkage void xen_invalid_op(void); -asmlinkage void xen_device_not_available(void); -asmlinkage void xen_double_fault(void); -asmlinkage void xen_coprocessor_segment_overrun(void); -asmlinkage void xen_invalid_TSS(void); -asmlinkage void xen_segment_not_present(void); -asmlinkage void xen_stack_segment(void); -asmlinkage void xen_general_protection(void); -asmlinkage void xen_page_fault(void); -asmlinkage void xen_spurious_interrupt_bug(void); -asmlinkage void xen_coprocessor_error(void); -asmlinkage void xen_alignment_check(void); -#ifdef CONFIG_X86_MCE -asmlinkage void xen_machine_check(void); -#endif /* CONFIG_X86_MCE */ -asmlinkage void xen_simd_coprocessor_error(void); -#endif - -dotraplinkage void do_divide_error(struct pt_regs *regs, long error_code); -dotraplinkage void do_debug(struct pt_regs *regs, long error_code); -dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); -dotraplinkage void do_int3(struct pt_regs *regs, long error_code); -dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); -dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); -dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); -dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); -dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); -dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); -dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); -#ifdef CONFIG_X86_64 -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code); asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace -struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); -void __init trap_init(void); -#endif -dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); -dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); -dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); -dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); -dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); -#ifdef CONFIG_X86_MCE -dotraplinkage void do_machine_check(struct pt_regs *regs, long error_code); +struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs); +asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif -dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); -#ifdef CONFIG_X86_32 -dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); + +extern int ibt_selftest(void); +extern int ibt_selftest_noendbr(void); + +#ifdef CONFIG_X86_F00F_BUG +/* For handling the FOOF bug */ +void handle_invalid_op(struct pt_regs *regs); #endif -dotraplinkage void do_mce(struct pt_regs *regs, long error_code); static inline int get_si_code(unsigned long condition) { @@ -104,72 +35,26 @@ static inline int get_si_code(unsigned long condition) return TRAP_BRKPT; } -extern int panic_on_unrecovered_nmi; - void math_emulate(struct math_emu_info *); -#ifndef CONFIG_X86_32 -asmlinkage void smp_thermal_interrupt(struct pt_regs *regs); -asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); -asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); -#endif -void smp_apic_timer_interrupt(struct pt_regs *regs); -void smp_spurious_interrupt(struct pt_regs *regs); -void smp_error_interrupt(struct pt_regs *regs); -asmlinkage void smp_irq_move_cleanup_interrupt(void); - -extern void ist_enter(struct pt_regs *regs); -extern void ist_exit(struct pt_regs *regs); -extern void ist_begin_non_atomic(struct pt_regs *regs); -extern void ist_end_non_atomic(void); +bool fault_in_kernel_space(unsigned long address); #ifdef CONFIG_VMAP_STACK -void __noreturn handle_stack_overflow(const char *message, - struct pt_regs *regs, - unsigned long fault_address); +void __noreturn handle_stack_overflow(struct pt_regs *regs, + unsigned long fault_address, + struct stack_info *info); #endif -/* Interrupts/Exceptions */ -enum { - X86_TRAP_DE = 0, /* 0, Divide-by-zero */ - X86_TRAP_DB, /* 1, Debug */ - X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ - X86_TRAP_BP, /* 3, Breakpoint */ - X86_TRAP_OF, /* 4, Overflow */ - X86_TRAP_BR, /* 5, Bound Range Exceeded */ - X86_TRAP_UD, /* 6, Invalid Opcode */ - X86_TRAP_NM, /* 7, Device Not Available */ - X86_TRAP_DF, /* 8, Double Fault */ - X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ - X86_TRAP_TS, /* 10, Invalid TSS */ - X86_TRAP_NP, /* 11, Segment Not Present */ - X86_TRAP_SS, /* 12, Stack Segment Fault */ - X86_TRAP_GP, /* 13, General Protection Fault */ - X86_TRAP_PF, /* 14, Page Fault */ - X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ - X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ - X86_TRAP_AC, /* 17, Alignment Check */ - X86_TRAP_MC, /* 18, Machine Check */ - X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ - X86_TRAP_IRET = 32, /* 32, IRET Exception */ -}; +static inline void cond_local_irq_enable(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void cond_local_irq_disable(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} -/* - * Page fault error code bits: - * - * bit 0 == 0: no page found 1: protection fault - * bit 1 == 0: read access 1: write access - * bit 2 == 0: kernel-mode access 1: user-mode access - * bit 3 == 1: use of reserved bit detected - * bit 4 == 1: fault was an instruction fetch - * bit 5 == 1: protection keys block access - */ -enum x86_pf_error_code { - X86_PF_PROT = 1 << 0, - X86_PF_WRITE = 1 << 1, - X86_PF_USER = 1 << 2, - X86_PF_RSVD = 1 << 3, - X86_PF_INSTR = 1 << 4, - X86_PF_PK = 1 << 5, -}; #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 8a0c25c6bf09..4f7f09f50552 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -5,10 +5,64 @@ #ifndef _ASM_X86_TSC_H #define _ASM_X86_TSC_H +#include <asm/asm.h> +#include <asm/cpufeature.h> #include <asm/processor.h> +#include <asm/msr.h> -#define NS_SCALE 10 /* 2^10, carefully chosen */ -#define US_SCALE 32 /* 2^32, arbitralrily chosen */ +/** + * rdtsc() - returns the current TSC without ordering constraints + * + * rdtsc() returns the result of RDTSC as a 64-bit integer. The + * only ordering constraint it supplies is the ordering implied by + * "asm volatile": it will put the RDTSC in the place you expect. The + * CPU can and will speculatively execute that RDTSC, though, so the + * results can be non-monotonic if compared on different CPUs. + */ +static __always_inline u64 rdtsc(void) +{ + EAX_EDX_DECLARE_ARGS(val, low, high); + + asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); + + return EAX_EDX_VAL(val, low, high); +} + +/** + * rdtsc_ordered() - read the current TSC in program order + * + * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. + * It is ordered like a load to a global in-memory counter. It should + * be impossible to observe non-monotonic rdtsc_unordered() behavior + * across multiple CPUs as long as the TSC is synced. + */ +static __always_inline u64 rdtsc_ordered(void) +{ + EAX_EDX_DECLARE_ARGS(val, low, high); + + /* + * The RDTSC instruction is not ordered relative to memory + * access. The Intel SDM and the AMD APM are both vague on this + * point, but empirically an RDTSC instruction can be + * speculatively executed before prior loads. An RDTSC + * immediately after an appropriate barrier appears to be + * ordered as a normal load, that is, it provides the same + * ordering guarantees as reading from a global memory location + * that some other imaginary CPU is updating continuously with a + * time stamp. + * + * Thus, use the preferred barrier on the respective CPU, aiming for + * RDTSCP as the default. + */ + asm volatile(ALTERNATIVE_2("rdtsc", + "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, + "rdtscp", X86_FEATURE_RDTSCP) + : EAX_EDX_RET(val, low, high) + /* RDTSCP clobbers ECX with MSR_TSC_AUX. */ + :: "ecx"); + + return EAX_EDX_VAL(val, low, high); +} /* * Standard way to access the cycle counter. @@ -22,20 +76,15 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { -#ifndef CONFIG_X86_TSC - if (!boot_cpu_has(X86_FEATURE_TSC)) + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) return 0; -#endif - return rdtsc(); } - -extern struct system_counterval_t convert_art_to_tsc(u64 art); -extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); +#define get_cycles get_cycles extern void tsc_early_init(void); extern void tsc_init(void); -extern unsigned long calibrate_delay_is_known(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); @@ -58,12 +107,10 @@ extern bool tsc_async_resets; #ifdef CONFIG_X86_TSC extern bool tsc_store_and_check_tsc_adjust(bool bootcpu); extern void tsc_verify_tsc_adjust(bool resume); -extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); #else static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; } static inline void tsc_verify_tsc_adjust(bool resume) { } -static inline void check_tsc_sync_source(int cpu) { } static inline void check_tsc_sync_target(void) { } #endif diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 780f2b42c8ef..367297b188c3 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -5,118 +5,33 @@ * User space memory access functions */ #include <linux/compiler.h> +#include <linux/instrumented.h> #include <linux/kasan-checks.h> +#include <linux/mm_types.h> #include <linux/string.h> +#include <linux/mmap_lock.h> #include <asm/asm.h> #include <asm/page.h> #include <asm/smap.h> #include <asm/extable.h> +#include <asm/tlbflush.h> -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(-1UL) -#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) - -#define get_ds() (KERNEL_DS) -#define get_fs() (current->thread.addr_limit) -static inline void set_fs(mm_segment_t fs) -{ - current->thread.addr_limit = fs; - /* On user-mode return, check fs is correct */ - set_thread_flag(TIF_FSCHECK); -} - -#define segment_eq(a, b) ((a).seg == (b).seg) - -#define user_addr_max() (current->thread.addr_limit.seg) -#define __addr_ok(addr) \ - ((unsigned long __force)(addr) < user_addr_max()) - -/* - * Test whether a block of memory is a valid user space address. - * Returns 0 if the range is valid, nonzero otherwise. - */ -static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit) -{ - /* - * If we have used "sizeof()" for the size, - * we know it won't overflow the limit (but - * it might overflow the 'addr', so it's - * important to subtract the size from the - * limit, not add it to the address). - */ - if (__builtin_constant_p(size)) - return unlikely(addr > limit - size); - - /* Arbitrary sizes? Be careful about overflow */ - addr += size; - if (unlikely(addr < size)) - return true; - return unlikely(addr > limit); -} - -#define __range_not_ok(addr, size, limit) \ -({ \ - __chk_user_ptr(addr); \ - __chk_range_not_ok((unsigned long __force)(addr), size, limit); \ -}) - -#ifdef CONFIG_DEBUG_ATOMIC_SLEEP -# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task()) +#ifdef CONFIG_X86_32 +# include <asm/uaccess_32.h> #else -# define WARN_ON_IN_IRQ() +# include <asm/uaccess_64.h> #endif -/** - * access_ok: - Checks if a user space pointer is valid - * @addr: User space pointer to start of block to check - * @size: Size of block to check - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Checks if a pointer to a block of memory in user space is valid. - * - * Returns true (nonzero) if the memory block may be valid, false (zero) - * if it is definitely invalid. - * - * Note that, depending on architecture, this function probably just - * checks that the pointer is in the user space range - after calling - * this function, memory access functions may still return -EFAULT. - */ -#define access_ok(addr, size) \ -({ \ - WARN_ON_IN_IRQ(); \ - likely(!__range_not_ok(addr, size, user_addr_max())); \ -}) - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - * - * This gets kind of ugly. We want to return _two_ values in "get_user()" - * and yet we don't want to do any pointers, because that is too much - * of a performance impact. Thus we have a few rather ugly macros here, - * and hide all the ugliness from the user. - * - * The "__xxx" versions of the user access functions are versions that - * do not verify the address space, that must have been done previously - * with a separate "access_ok()" call (this is used when we do multiple - * accesses to the same area of user memory). - */ +#include <asm-generic/access_ok.h> extern int __get_user_1(void); extern int __get_user_2(void); extern int __get_user_4(void); extern int __get_user_8(void); +extern int __get_user_nocheck_1(void); +extern int __get_user_nocheck_2(void); +extern int __get_user_nocheck_4(void); +extern int __get_user_nocheck_8(void); extern int __get_user_bad(void); #define __uaccess_begin() stac() @@ -128,31 +43,24 @@ extern int __get_user_bad(void); }) /* - * This is a type: either unsigned long, if the argument fits into - * that type, or otherwise unsigned long long. + * This is the smallest unsigned integer type that can fit a value + * (up to 'long long') */ -#define __inttype(x) \ -__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) +#define __inttype(x) __typeof__( \ + __typefits(x,char, \ + __typefits(x,short, \ + __typefits(x,int, \ + __typefits(x,long,0ULL))))) + +#define __typefits(x,type,not) \ + __builtin_choose_expr(sizeof(x)<=sizeof(type),(unsigned type)0,not) -/** - * get_user: - Get a simple variable from user space. - * @x: Variable to store result. - * @ptr: Source address, in user space. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * This macro copies a single simple variable from user space to kernel - * space. It supports simple types like char and int, but not larger - * data types like structures or arrays. - * - * @ptr must have pointer-to-simple-variable type, and the result of - * dereferencing @ptr must be assignable to @x without a cast. - * - * Returns zero on success, or -EFAULT on error. - * On error, the variable @x is set to zero. - */ /* + * This is used for both get_user() and __get_user() to expand to + * the proper special function call that has odd calling conventions + * due to returning both a value and an error, and that depends on + * the size of the pointer passed in. + * * Careful: we have to cast the result to the type of the pointer * for sign reasons. * @@ -165,29 +73,67 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) * Clang/LLVM cares about the size of the register, but still wants * the base register for something that ends up being a pair. */ -#define get_user(x, ptr) \ +#define do_get_user_call(fn,x,ptr) \ ({ \ int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ __chk_user_ptr(ptr); \ - might_fault(); \ - asm volatile("call __get_user_%P4" \ + asm volatile("call __" #fn "_%c[size]" \ : "=a" (__ret_gu), "=r" (__val_gu), \ ASM_CALL_CONSTRAINT \ - : "0" (ptr), "i" (sizeof(*(ptr)))); \ + : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \ + instrument_get_user(__val_gu); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ }) -#define __put_user_x(size, x, ptr, __ret_pu) \ - asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ - : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") +/** + * get_user - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Return: zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); }) +/** + * __get_user - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Return: zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define __get_user(x,ptr) do_get_user_call(get_user_nocheck,x,ptr) #ifdef CONFIG_X86_32 #define __put_user_goto_u64(x, addr, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ _ASM_EXTABLE_UA(1b, %l2) \ @@ -195,39 +141,55 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) : : "A" (x), "r" (addr) \ : : label) -#define __put_user_asm_ex_u64(x, addr) \ - asm volatile("\n" \ - "1: movl %%eax,0(%1)\n" \ - "2: movl %%edx,4(%1)\n" \ - "3:" \ - _ASM_EXTABLE_EX(1b, 2b) \ - _ASM_EXTABLE_EX(2b, 3b) \ - : : "A" (x), "r" (addr)) - -#define __put_user_x8(x, ptr, __ret_pu) \ - asm volatile("call __put_user_8" : "=a" (__ret_pu) \ - : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else #define __put_user_goto_u64(x, ptr, label) \ - __put_user_goto(x, ptr, "q", "", "er", label) -#define __put_user_asm_ex_u64(x, addr) \ - __put_user_asm_ex(x, addr, "q", "", "er") -#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) + __put_user_goto(x, ptr, "q", "er", label) #endif extern void __put_user_bad(void); /* * Strange magic calling convention: pointer in %ecx, - * value in %eax(:%edx), return value in %eax. clobbers %rbx + * value in %eax(:%edx), return value in %ecx. clobbers %rbx */ extern void __put_user_1(void); extern void __put_user_2(void); extern void __put_user_4(void); extern void __put_user_8(void); +extern void __put_user_nocheck_1(void); +extern void __put_user_nocheck_2(void); +extern void __put_user_nocheck_4(void); +extern void __put_user_nocheck_8(void); + +/* + * ptr must be evaluated and assigned to the temporary __ptr_pu before + * the assignment of x to __val_pu, to avoid any function calls + * involved in the ptr expression (possibly implicitly generated due + * to KASAN) from clobbering %ax. + */ +#define do_put_user_call(fn,x,ptr) \ +({ \ + int __ret_pu; \ + void __user *__ptr_pu; \ + register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ + __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \ + __chk_user_ptr(__ptr); \ + __ptr_pu = __ptr; \ + __val_pu = __x; \ + asm volatile("call __" #fn "_%c[size]" \ + : "=c" (__ret_pu), \ + ASM_CALL_CONSTRAINT \ + : "0" (__ptr_pu), \ + "r" (__val_pu), \ + [size] "i" (sizeof(*(ptr))) \ + :"ebx"); \ + instrument_put_user(__x, __ptr, sizeof(*(ptr))); \ + __builtin_expect(__ret_pu, 0); \ +}) /** - * put_user: - Write a simple value into user space. + * put_user - Write a simple value into user space. * @x: Value to copy to user space. * @ptr: Destination address, in user space. * @@ -241,214 +203,268 @@ extern void __put_user_8(void); * @ptr must have pointer-to-simple-variable type, and @x must be assignable * to the result of dereferencing @ptr. * - * Returns zero on success, or -EFAULT on error. + * Return: zero on success, or -EFAULT on error. */ -#define put_user(x, ptr) \ -({ \ - int __ret_pu; \ - __typeof__(*(ptr)) __pu_val; \ - __chk_user_ptr(ptr); \ - might_fault(); \ - __pu_val = x; \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __put_user_x(1, __pu_val, ptr, __ret_pu); \ - break; \ - case 2: \ - __put_user_x(2, __pu_val, ptr, __ret_pu); \ - break; \ - case 4: \ - __put_user_x(4, __pu_val, ptr, __ret_pu); \ - break; \ - case 8: \ - __put_user_x8(__pu_val, ptr, __ret_pu); \ - break; \ - default: \ - __put_user_x(X, __pu_val, ptr, __ret_pu); \ - break; \ - } \ - __builtin_expect(__ret_pu, 0); \ -}) +#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); }) + +/** + * __put_user - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Return: zero on success, or -EFAULT on error. + */ +#define __put_user(x, ptr) do_put_user_call(put_user_nocheck,x,ptr) #define __put_user_size(x, ptr, size, label) \ do { \ - __chk_user_ptr(ptr); \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ + __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \ + __chk_user_ptr(__ptr); \ switch (size) { \ case 1: \ - __put_user_goto(x, ptr, "b", "b", "iq", label); \ + __put_user_goto(__x, __ptr, "b", "iq", label); \ break; \ case 2: \ - __put_user_goto(x, ptr, "w", "w", "ir", label); \ + __put_user_goto(__x, __ptr, "w", "ir", label); \ break; \ case 4: \ - __put_user_goto(x, ptr, "l", "k", "ir", label); \ + __put_user_goto(__x, __ptr, "l", "ir", label); \ break; \ case 8: \ - __put_user_goto_u64((__typeof__(*ptr))(x), ptr, label); \ + __put_user_goto_u64(__x, __ptr, label); \ break; \ default: \ __put_user_bad(); \ } \ + instrument_put_user(__x, __ptr, size); \ } while (0) -/* - * This doesn't do __uaccess_begin/end - the exception handling - * around it must do that. - */ -#define __put_user_size_ex(x, ptr, size) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#ifdef CONFIG_X86_32 +#define __get_user_asm_u64(x, ptr, label) do { \ + unsigned int __gu_low, __gu_high; \ + const unsigned int __user *__gu_ptr; \ + __gu_ptr = (const void __user *)(ptr); \ + __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label); \ + __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label); \ + (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \ +} while (0) +#else +#define __get_user_asm_u64(x, ptr, label) \ + __get_user_asm(x, ptr, "q", "=r", label) +#endif + +#define __get_user_size(x, ptr, size, label) \ do { \ __chk_user_ptr(ptr); \ switch (size) { \ - case 1: \ - __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ + case 1: { \ + unsigned char x_u8__; \ + __get_user_asm(x_u8__, ptr, "b", "=q", label); \ + (x) = x_u8__; \ break; \ + } \ case 2: \ - __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ + __get_user_asm(x, ptr, "w", "=r", label); \ break; \ case 4: \ - __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ + __get_user_asm(x, ptr, "l", "=r", label); \ break; \ case 8: \ - __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ + __get_user_asm_u64(x, ptr, label); \ break; \ default: \ - __put_user_bad(); \ + (x) = __get_user_bad(); \ } \ + instrument_get_user(x); \ } while (0) +#define __get_user_asm(x, addr, itype, ltype, label) \ + asm_goto_output("\n" \ + "1: mov"itype" %[umem],%[output]\n" \ + _ASM_EXTABLE_UA(1b, %l2) \ + : [output] ltype(x) \ + : [umem] "m" (__m(addr)) \ + : : label) + +#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT + #ifdef CONFIG_X86_32 -#define __get_user_asm_u64(x, ptr, retval, errret) \ +#define __get_user_asm_u64(x, ptr, retval) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - asm volatile("\n" \ - "1: movl %2,%%eax\n" \ - "2: movl %3,%%edx\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: mov %4,%0\n" \ - " xorl %%eax,%%eax\n" \ - " xorl %%edx,%%edx\n" \ - " jmp 3b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 4b) \ - _ASM_EXTABLE_UA(2b, 4b) \ - : "=r" (retval), "=&A"(x) \ - : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \ - "i" (errret), "0" (retval)); \ + asm volatile("\n" \ + "1: movl %[lowbits],%%eax\n" \ + "2: movl %[highbits],%%edx\n" \ + "3:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX_DX, \ + %[errout]) \ + _ASM_EXTABLE_TYPE_REG(2b, 3b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX_DX, \ + %[errout]) \ + : [errout] "=r" (retval), \ + [output] "=&A"(x) \ + : [lowbits] "m" (__m(__ptr)), \ + [highbits] "m" __m(((u32 __user *)(__ptr)) + 1), \ + "0" (retval)); \ }) -#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() #else -#define __get_user_asm_u64(x, ptr, retval, errret) \ - __get_user_asm(x, ptr, retval, "q", "", "=r", errret) -#define __get_user_asm_ex_u64(x, ptr) \ - __get_user_asm_ex(x, ptr, "q", "", "=r") +#define __get_user_asm_u64(x, ptr, retval) \ + __get_user_asm(x, ptr, retval, "q") #endif -#define __get_user_size(x, ptr, size, retval, errret) \ +#define __get_user_size(x, ptr, size, retval) \ do { \ + unsigned char x_u8__; \ + \ retval = 0; \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ - __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ + __get_user_asm(x_u8__, ptr, retval, "b"); \ + (x) = x_u8__; \ break; \ case 2: \ - __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ + __get_user_asm(x, ptr, retval, "w"); \ break; \ case 4: \ - __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \ + __get_user_asm(x, ptr, retval, "l"); \ break; \ case 8: \ - __get_user_asm_u64(x, ptr, retval, errret); \ + __get_user_asm_u64(x, ptr, retval); \ break; \ default: \ (x) = __get_user_bad(); \ } \ } while (0) -#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ +#define __get_user_asm(x, addr, err, itype) \ asm volatile("\n" \ - "1: mov"itype" %2,%"rtype"1\n" \ + "1: mov"itype" %[umem],%[output]\n" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ - " xor"itype" %"rtype"1,%"rtype"1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "=r" (err), ltype(x) \ - : "m" (__m(addr)), "i" (errret), "0" (err)) - -#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \ + EX_FLAG_CLEAR_AX, \ + %[errout]) \ + : [errout] "=r" (err), \ + [output] "=a" (x) \ + : [umem] "m" (__m(addr)), \ + "0" (err)) + +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_goto_output("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : "=@ccz" (success), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_goto_output("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : "=@ccz" (success), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) +#endif // CONFIG_X86_32 +#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + int __err = 0; \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ asm volatile("\n" \ - "1: mov"itype" %2,%"rtype"1\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "=r" (err), ltype(x) \ - : "m" (__m(addr)), "i" (errret), "0" (err)) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[errout]) \ + : "=@ccz" (success), \ + [errout] "+r" (__err), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory"); \ + if (unlikely(__err)) \ + goto label; \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) +#ifdef CONFIG_X86_32 /* - * This doesn't do __uaccess_begin/end - the exception handling - * around it must do that. + * Unlike the normal CMPXCHG, use output GPR for both success/fail and error. + * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are + * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses + * both ESI and EDI for the memory operand, compilation will fail if the error + * is an input+output as there will be no register available for input. */ -#define __get_user_size_ex(x, ptr, size) \ -do { \ - __chk_user_ptr(ptr); \ - switch (size) { \ - case 1: \ - __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ - break; \ - case 2: \ - __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ - break; \ - case 4: \ - __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ - break; \ - case 8: \ - __get_user_asm_ex_u64(x, ptr); \ - break; \ - default: \ - (x) = __get_user_bad(); \ - } \ -} while (0) - -#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %1,%"rtype"0\n" \ +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + int __result; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "mov $0, %[result]\n\t" \ + "setz %b[result]\n" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:xor"itype" %"rtype"0,%"rtype"0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_EX(1b, 3b) \ - : ltype(x) : "m" (__m(addr))) - -#define __put_user_nocheck(x, ptr, size) \ -({ \ - __label__ __pu_label; \ - int __pu_err = -EFAULT; \ - __uaccess_begin(); \ - __put_user_size((x), (ptr), (size), __pu_label); \ - __pu_err = 0; \ -__pu_label: \ - __uaccess_end(); \ - __builtin_expect(__pu_err, 0); \ -}) - -#define __get_user_nocheck(x, ptr, size) \ -({ \ - int __gu_err; \ - __inttype(*(ptr)) __gu_val; \ - __uaccess_begin_nospec(); \ - __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ - __uaccess_end(); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __builtin_expect(__gu_err, 0); \ -}) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[result]) \ + : [result] "=q" (__result), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory", "cc"); \ + if (unlikely(__result < 0)) \ + goto label; \ + if (unlikely(!__result)) \ + *_old = __old; \ + likely(__result); }) +#endif // CONFIG_X86_32 +#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; @@ -459,118 +475,13 @@ struct __large_struct { unsigned long buf[100]; }; * we do not write to any memory gcc knows about, so there are no * aliasing issues. */ -#define __put_user_goto(x, addr, itype, rtype, ltype, label) \ - asm_volatile_goto("\n" \ - "1: mov"itype" %"rtype"0,%1\n" \ - _ASM_EXTABLE_UA(1b, %l2) \ +#define __put_user_goto(x, addr, itype, ltype, label) \ + asm goto("\n" \ + "1: mov"itype" %0,%1\n" \ + _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ : : label) -#define __put_user_failed(x, addr, itype, rtype, ltype, errret) \ - ({ __label__ __puflab; \ - int __pufret = errret; \ - __put_user_goto(x,addr,itype,rtype,ltype,__puflab); \ - __pufret = 0; \ - __puflab: __pufret; }) - -#define __put_user_asm(x, addr, retval, itype, rtype, ltype, errret) do { \ - retval = __put_user_failed(x, addr, itype, rtype, ltype, errret); \ -} while (0) - -#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %"rtype"0,%1\n" \ - "2:\n" \ - _ASM_EXTABLE_EX(1b, 2b) \ - : : ltype(x), "m" (__m(addr))) - -/* - * uaccess_try and catch - */ -#define uaccess_try do { \ - current->thread.uaccess_err = 0; \ - __uaccess_begin(); \ - barrier(); - -#define uaccess_try_nospec do { \ - current->thread.uaccess_err = 0; \ - __uaccess_begin_nospec(); \ - -#define uaccess_catch(err) \ - __uaccess_end(); \ - (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ -} while (0) - -/** - * __get_user: - Get a simple variable from user space, with less checking. - * @x: Variable to store result. - * @ptr: Source address, in user space. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * This macro copies a single simple variable from user space to kernel - * space. It supports simple types like char and int, but not larger - * data types like structures or arrays. - * - * @ptr must have pointer-to-simple-variable type, and the result of - * dereferencing @ptr must be assignable to @x without a cast. - * - * Caller must check the pointer with access_ok() before calling this - * function. - * - * Returns zero on success, or -EFAULT on error. - * On error, the variable @x is set to zero. - */ - -#define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr))) - -/** - * __put_user: - Write a simple value into user space, with less checking. - * @x: Value to copy to user space. - * @ptr: Destination address, in user space. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * This macro copies a single simple value from kernel space to user - * space. It supports simple types like char and int, but not larger - * data types like structures or arrays. - * - * @ptr must have pointer-to-simple-variable type, and @x must be assignable - * to the result of dereferencing @ptr. - * - * Caller must check the pointer with access_ok() before calling this - * function. - * - * Returns zero on success, or -EFAULT on error. - */ - -#define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -/* - * {get|put}_user_try and catch - * - * get_user_try { - * get_user_ex(...); - * } get_user_catch(err) - */ -#define get_user_try uaccess_try_nospec -#define get_user_catch(err) uaccess_catch(err) - -#define get_user_ex(x, ptr) do { \ - unsigned long __gue_val; \ - __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ - (x) = (__force __typeof__(*(ptr)))__gue_val; \ -} while (0) - -#define put_user_try uaccess_try -#define put_user_catch(err) uaccess_catch(err) - -#define put_user_ex(x, ptr) \ - __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); extern __must_check long @@ -578,102 +489,14 @@ strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); -unsigned long __must_check clear_user(void __user *mem, unsigned long len); -unsigned long __must_check __clear_user(void __user *mem, unsigned long len); - -extern void __cmpxchg_wrong_size(void) - __compiletime_error("Bad argument size for cmpxchg"); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned len); +#define copy_mc_to_kernel copy_mc_to_kernel -#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \ -({ \ - int __ret = 0; \ - __typeof__(ptr) __uval = (uval); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin_nospec(); \ - switch (size) { \ - case 1: \ - { \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "q" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - case 2: \ - { \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "r" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - case 4: \ - { \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "r" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - case 8: \ - { \ - if (!IS_ENABLED(CONFIG_X86_64)) \ - __cmpxchg_wrong_size(); \ - \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "r" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - default: \ - __cmpxchg_wrong_size(); \ - } \ - __uaccess_end(); \ - *__uval = __old; \ - __ret; \ -}) - -#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new) \ -({ \ - access_ok((ptr), sizeof(*(ptr))) ? \ - __user_atomic_cmpxchg_inatomic((uval), (ptr), \ - (old), (new), sizeof(*(ptr))) : \ - -EFAULT; \ -}) +unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned len); +#endif /* * movsl can be slow when source and dest are not both 8-byte aligned @@ -686,28 +509,13 @@ extern struct movsl_mask { #define ARCH_HAS_NOCACHE_UACCESS 1 -#ifdef CONFIG_X86_32 -# include <asm/uaccess_32.h> -#else -# include <asm/uaccess_64.h> -#endif - -/* - * We rely on the nested NMI work to allow atomic faults from the NMI path; the - * nested NMI paths are careful to preserve CR2. - * - * Caller must use pagefault_enable/disable, or run in interrupt context, - * and also do a uaccess_ok() check - */ -#define __copy_from_user_nmi __copy_from_user_inatomic - /* * The "unsafe" user accesses aren't really "unsafe", but the naming * is a big fat warning: you have to not only do the access_ok() * checking before using them, but you have to surround them with the * user_access_begin/end() pair. */ -static __must_check inline bool user_access_begin(const void __user *ptr, size_t len) +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr,len))) return 0; @@ -717,17 +525,117 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t #define user_access_begin(a,b) user_access_begin(a,b) #define user_access_end() __uaccess_end() -#define unsafe_put_user(x, ptr, label) \ +#define user_access_save() smap_save() +#define user_access_restore(x) smap_restore(x) + +#define arch_unsafe_put_user(x, ptr, label) \ __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) -#define unsafe_get_user(x, ptr, err_label) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define arch_unsafe_get_user(x, ptr, err_label) \ +do { \ + __inttype(*(ptr)) __gu_val; \ + __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ +} while (0) +#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define arch_unsafe_get_user(x, ptr, err_label) \ do { \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ + __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ if (unlikely(__gu_err)) goto err_label; \ } while (0) +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +extern void __try_cmpxchg_user_wrong_size(void); + +#ifndef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ + __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) +#endif + +/* + * Force the pointer to u<size> to match the size expected by the asm helper. + * clang/LLVM compiles all cases and only discards the unused paths after + * processing errors, which breaks i386 if the pointer is an 8-byte value. + */ +#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + bool __ret; \ + __chk_user_ptr(_ptr); \ + switch (sizeof(*(_ptr))) { \ + case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ + (__force u8 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ + (__force u16 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ + (__force u32 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ + (_nval), _label); \ + break; \ + default: __try_cmpxchg_user_wrong_size(); \ + } \ + __ret; }) + +/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ +#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + int __ret = -EFAULT; \ + __uaccess_begin_nospec(); \ + __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ +_label: \ + __uaccess_end(); \ + __ret; \ + }) + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define arch_get_kernel_nofault(dst, src, type, err_label) \ + __get_user_size(*((type *)(dst)), (__force type __user *)(src), \ + sizeof(type), err_label) +#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define arch_get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __kr_err; \ + \ + __get_user_size(*((type *)(dst)), (__force type __user *)(src), \ + sizeof(type), __kr_err); \ + if (unlikely(__kr_err)) \ + goto err_label; \ +} while (0) +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define arch_put_kernel_nofault(dst, src, type, err_label) \ + __put_user_size(*((type *)(src)), (__force type __user *)(dst), \ + sizeof(type), err_label) #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index ba2dc1930630..40379a1adbb8 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - if (__builtin_constant_p(n)) { - unsigned long ret; - - switch (n) { - case 1: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)to, from, ret, - "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)to, from, ret, - "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)to, from, ret, - "l", "k", "=r", 4); - __uaccess_end(); - return ret; - } - } return __copy_user_ll(to, (__force const void *)from, n); } @@ -60,4 +33,7 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, return __copy_from_user_ll_nocache_nozero(to, from, n); } +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); + #endif /* _ASM_X86_UACCESS_32_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index a9d637bc301d..915124011c27 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -11,193 +11,155 @@ #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/page.h> +#include <asm/percpu.h> +#ifdef MODULE + #define runtime_const_ptr(sym) (sym) +#else + #include <asm/runtime-const.h> +#endif +extern unsigned long USER_PTR_MAX; + +#ifdef CONFIG_ADDRESS_MASKING /* - * Copy To/From Userspace + * Mask out tag bits from the address. */ +static inline unsigned long __untagged_addr(unsigned long addr) +{ + asm_inline (ALTERNATIVE("", "and " __percpu_arg([mask]) ", %[addr]", + X86_FEATURE_LAM) + : [addr] "+r" (addr) + : [mask] "m" (__my_cpu_var(tlbstate_untag_mask))); -/* Handles exceptions in both to and from, but doesn't do access_ok */ -__must_check unsigned long -copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); -__must_check unsigned long -copy_user_generic_string(void *to, const void *from, unsigned len); -__must_check unsigned long -copy_user_generic_unrolled(void *to, const void *from, unsigned len); + return addr; +} -static __always_inline __must_check unsigned long -copy_user_generic(void *to, const void *from, unsigned len) +#define untagged_addr(addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr(__addr); \ +}) + +static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, + unsigned long addr) { - unsigned ret; + mmap_assert_locked(mm); + return addr & (mm)->context.untag_mask; +} - /* - * If CPU has ERMS feature, use copy_user_enhanced_fast_string. - * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. - * Otherwise, use copy_user_generic_unrolled. - */ - alternative_call_2(copy_user_generic_unrolled, - copy_user_generic_string, - X86_FEATURE_REP_GOOD, - copy_user_enhanced_fast_string, - X86_FEATURE_ERMS, - ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), - "=d" (len)), - "1" (to), "2" (from), "3" (len) - : "memory", "rcx", "r8", "r9", "r10", "r11"); +#define untagged_addr_remote(mm, addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \ +}) + +#endif + +#define valid_user_address(x) \ + likely((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX)) + +/* + * Masking the user address is an alternative to a conditional + * user_access_begin that can avoid the fencing. This only works + * for dense accesses starting at the address. + */ +static inline void __user *mask_user_address(const void __user *ptr) +{ + void __user *ret; + asm("cmp %1,%0\n\t" + "cmova %1,%0" + :"=r" (ret) + :"r" (runtime_const_ptr(USER_PTR_MAX)), + "0" (ptr)); return ret; } +#define masked_user_access_begin(x) ({ \ + auto __masked_ptr = (x); \ + __masked_ptr = mask_user_address(__masked_ptr); \ + __uaccess_begin(); __masked_ptr; }) -static __always_inline __must_check unsigned long -copy_to_user_mcsafe(void *to, const void *from, unsigned len) +/* + * User pointers can have tag bits on x86-64. This scheme tolerates + * arbitrary values in those bits rather then masking them off. + * + * Enforce two rules: + * 1. 'ptr' must be in the user part of the address space + * 2. 'ptr+size' must not overflow into kernel addresses + * + * Note that we always have at least one guard page between the + * max user address and the non-canonical gap, allowing us to + * ignore small sizes entirely. + * + * In fact, we could probably remove the size check entirely, since + * any kernel accesses will be in increasing address order starting + * at 'ptr'. + * + * That's a separate optimization, for now just handle the small + * constant case. + */ +static inline bool __access_ok(const void __user *ptr, unsigned long size) { - unsigned long ret; + if (__builtin_constant_p(size <= PAGE_SIZE) && size <= PAGE_SIZE) { + return valid_user_address(ptr); + } else { + unsigned long sum = size + (__force unsigned long)ptr; + + return valid_user_address(sum) && sum >= (__force unsigned long)ptr; + } +} +#define __access_ok __access_ok - __uaccess_begin(); +/* + * Copy To/From Userspace + */ + +/* Handles exceptions in both to and from, but doesn't do access_ok */ +__must_check unsigned long +rep_movs_alternative(void *to, const void *from, unsigned len); + +static __always_inline __must_check unsigned long +copy_user_generic(void *to, const void *from, unsigned long len) +{ + stac(); /* - * Note, __memcpy_mcsafe() is explicitly used since it can - * handle exceptions / faults. memcpy_mcsafe() may fall back to - * memcpy() which lacks this handling. + * If CPU has FSRM feature, use 'rep movs'. + * Otherwise, use rep_movs_alternative. */ - ret = __memcpy_mcsafe(to, from, len); - __uaccess_end(); - return ret; + asm volatile( + "1:\n\t" + ALTERNATIVE("rep movsb", + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT + : : "memory", "rax"); + clac(); + return len; } static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic(dst, (__force void *)src, size); - switch (size) { - case 1: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, - ret, "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, - ret, "l", "k", "=r", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 10); - if (likely(!ret)) - __get_user_asm_nozero(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 16: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); - if (likely(!ret)) - __get_user_asm_nozero(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - default: - return copy_user_generic(dst, (__force void *)src, size); - } + return copy_user_generic(dst, (__force void *)src, size); } static __always_inline __must_check unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, src, size); - switch (size) { - case 1: - __uaccess_begin(); - __put_user_asm(*(u8 *)src, (u8 __user *)dst, - ret, "b", "b", "iq", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin(); - __put_user_asm(*(u16 *)src, (u16 __user *)dst, - ret, "w", "w", "ir", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin(); - __put_user_asm(*(u32 *)src, (u32 __user *)dst, - ret, "l", "k", "ir", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 10); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, - ret, "w", "w", "ir", 2); - } - __uaccess_end(); - return ret; - case 16: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 16); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, - ret, "q", "", "er", 8); - } - __uaccess_end(); - return ret; - default: - return copy_user_generic((__force void *)dst, src, size); - } -} - -static __always_inline __must_check -unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size) -{ - return copy_user_generic((__force void *)dst, - (__force void *)src, size); + return copy_user_generic((__force void *)dst, src, size); } -extern long __copy_user_nocache(void *dst, const void __user *src, - unsigned size, int zerorest); - +extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size); extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); -extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, - size_t len); static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { + long ret; kasan_check_write(dst, size); - return __copy_user_nocache(dst, src, size, 0); + stac(); + ret = __copy_user_nocache(dst, src, size); + clac(); + return ret; } static inline int @@ -207,10 +169,40 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) return __copy_user_flushcache(dst, src, size); } -unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len); +/* + * Zero Userspace. + */ -unsigned long -mcsafe_handle_tail(char *to, char *from, unsigned len); +__must_check unsigned long +rep_stos_alternative(void __user *addr, unsigned long len); + +static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) +{ + might_fault(); + stac(); + /* + * No memory constraint because it doesn't change any memory gcc + * knows about. + */ + asm volatile( + "1:\n\t" + ALTERNATIVE("rep stosb", + "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS)) + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT + : "a" (0)); + + clac(); + + return size; +} + +static __always_inline unsigned long clear_user(void __user *to, unsigned long n) +{ + if (__access_ok(to, n)) + return __clear_user(to, n); + return n; +} #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h index db43f2a0d92c..aeed98c3c9e1 100644 --- a/arch/x86/include/asm/umip.h +++ b/arch/x86/include/asm/umip.h @@ -4,9 +4,9 @@ #include <linux/types.h> #include <asm/ptrace.h> -#ifdef CONFIG_X86_INTEL_UMIP +#ifdef CONFIG_X86_UMIP bool fixup_umip_exception(struct pt_regs *regs); #else static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; } -#endif /* CONFIG_X86_INTEL_UMIP */ +#endif /* CONFIG_X86_UMIP */ #endif /* _ASM_X86_UMIP_H */ diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h new file mode 100644 index 000000000000..f5937e9866ac --- /dev/null +++ b/arch/x86/include/asm/unaccepted_memory.h @@ -0,0 +1,27 @@ +#ifndef _ASM_X86_UNACCEPTED_MEMORY_H +#define _ASM_X86_UNACCEPTED_MEMORY_H + +#include <linux/efi.h> +#include <asm/tdx.h> +#include <asm/sev.h> + +static inline void arch_accept_memory(phys_addr_t start, phys_addr_t end) +{ + /* Platform-specific memory-acceptance call goes here */ + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { + if (!tdx_accept_memory(start, end)) + panic("TDX: Failed to accept memory\n"); + } else if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { + snp_accept_memory(start, end); + } else { + panic("Cannot accept memory: unknown platform\n"); + } +} + +static inline struct efi_unaccepted_memory *efi_get_unaccepted_table(void) +{ + if (efi.unaccepted == EFI_INVALID_TABLE_ADDR) + return NULL; + return __va(efi.unaccepted); +} +#endif diff --git a/arch/x86/include/asm/unaligned.h b/arch/x86/include/asm/unaligned.h deleted file mode 100644 index 9c754a7447aa..000000000000 --- a/arch/x86/include/asm/unaligned.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_UNALIGNED_H -#define _ASM_X86_UNALIGNED_H - -/* - * The x86 can do unaligned accesses itself. - */ - -#include <linux/unaligned/access_ok.h> -#include <linux/unaligned/generic.h> - -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le - -#endif /* _ASM_X86_UNALIGNED_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index dc4ed8bc2382..6c9e5bdd3916 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -5,12 +5,6 @@ #include <uapi/asm/unistd.h> -# ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) -# else -# define __SYSCALL_MASK (~0) -# endif - # ifdef CONFIG_X86_32 # include <asm/unistd_32.h> @@ -19,19 +13,27 @@ # define __ARCH_WANT_SYS_OLD_MMAP # define __ARCH_WANT_SYS_OLD_SELECT +# define IA32_NR_syscalls (__NR_syscalls) + # else # include <asm/unistd_64.h> # include <asm/unistd_64_x32.h> -# define __ARCH_WANT_COMPAT_SYS_TIME -# define __ARCH_WANT_SYS_UTIME32 +# include <asm/unistd_32_ia32.h> +# define __ARCH_WANT_SYS_TIME +# define __ARCH_WANT_SYS_UTIME +# define __ARCH_WANT_COMPAT_STAT # define __ARCH_WANT_COMPAT_SYS_PREADV64 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 # define __ARCH_WANT_COMPAT_SYS_PREADV64V2 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +# define X32_NR_syscalls (__NR_x32_syscalls) +# define IA32_NR_syscalls (__NR_ia32_syscalls) # endif +# define NR_syscalls (__NR_syscalls) + # define __ARCH_WANT_NEW_STAT # define __ARCH_WANT_OLD_READDIR # define __ARCH_WANT_OLD_STAT @@ -48,8 +50,8 @@ # define __ARCH_WANT_SYS_SIGPENDING # define __ARCH_WANT_SYS_SIGPROCMASK # define __ARCH_WANT_SYS_SOCKETCALL -# define __ARCH_WANT_SYS_TIME -# define __ARCH_WANT_SYS_UTIME +# define __ARCH_WANT_SYS_TIME32 +# define __ARCH_WANT_SYS_UTIME32 # define __ARCH_WANT_SYS_WAITPID # define __ARCH_WANT_SYS_FORK # define __ARCH_WANT_SYS_VFORK diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 1f86e1b0a5cd..7cede4dc21f0 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/ftrace.h> +#include <linux/rethook.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> @@ -15,14 +16,23 @@ struct unwind_state { unsigned long stack_mask; struct task_struct *task; int graph_idx; +#if defined(CONFIG_RETHOOK) + struct llist_node *kr_cur; +#endif bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; - struct pt_regs *regs; + struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; + /* + * If non-NULL: The current frame is incomplete and doesn't contain a + * valid BP. When looking for the next frame, use this instead of the + * non-existent saved BP. + */ + unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; @@ -93,6 +103,30 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif +static inline +unsigned long unwind_recover_rethook(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ +#ifdef CONFIG_RETHOOK + if (is_rethook_trampoline(addr)) + return rethook_find_ret_addr(state->task, (unsigned long)addr_p, + &state->kr_cur); +#endif + return addr; +} + +/* Recover the return address modified by rethook and ftrace_graph. */ +static inline +unsigned long unwind_recover_ret_addr(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ + unsigned long ret; + + ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, addr_p); + return unwind_recover_rethook(state, ret, addr_p); +} + /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 0bcdb1279361..8f4579c5a6f8 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -1,51 +1,26 @@ #ifndef _ASM_X86_UNWIND_HINTS_H #define _ASM_X86_UNWIND_HINTS_H +#include <linux/objtool.h> + #include "orc_types.h" -#ifdef __ASSEMBLY__ - -/* - * In asm, there are two kinds of code: normal C-type callable functions and - * the rest. The normal callable functions can be called by other code, and - * don't do anything unusual with the stack. Such normal callable functions - * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this - * category. In this case, no special debugging annotations are needed because - * objtool can automatically generate the ORC data for the ORC unwinder to read - * at runtime. - * - * Anything which doesn't fall into the above category, such as syscall and - * interrupt handlers, tends to not be called directly by other functions, and - * often does unusual non-C-function-type things with the stack pointer. Such - * code needs to be annotated such that objtool can understand it. The - * following CFI hint macros are for this type of code. - * - * These macros provide hints to objtool about the state of the stack at each - * instruction. Objtool starts from the hints and follows the code flow, - * making automatic CFI adjustments when it sees pushes and pops, filling out - * the debuginfo as necessary. It will also warn if it sees any - * inconsistencies. - */ -.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0 -#ifdef CONFIG_STACK_VALIDATION -.Lunwind_hint_ip_\@: - .pushsection .discard.unwind_hints - /* struct unwind_hint */ - .long .Lunwind_hint_ip_\@ - . - .short \sp_offset - .byte \sp_reg - .byte \type - .byte \end - .balign 4 - .popsection -#endif +#ifdef __ASSEMBLER__ + +.macro UNWIND_HINT_END_OF_STACK + UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK +.endm + +.macro UNWIND_HINT_UNDEFINED + UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED .endm -.macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1 +.macro UNWIND_HINT_ENTRY + VALIDATE_UNRET_BEGIN + UNWIND_HINT_END_OF_STACK .endm -.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 +.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 signal=1 .if \base == %rsp .if \indirect .set sp_reg, ORC_REG_SP_INDIRECT @@ -66,44 +41,53 @@ .set sp_offset, \offset - .if \iret - .set type, ORC_TYPE_REGS_IRET + .if \partial + .set type, UNWIND_HINT_TYPE_REGS_PARTIAL .elseif \extra == 0 - .set type, ORC_TYPE_REGS_IRET + .set type, UNWIND_HINT_TYPE_REGS_PARTIAL .set sp_offset, \offset + (16*8) .else - .set type, ORC_TYPE_REGS + .set type, UNWIND_HINT_TYPE_REGS .endif - UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type + UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type signal=\signal +.endm + +.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 signal=1 + UNWIND_HINT_REGS base=\base offset=\offset partial=1 signal=\signal .endm -.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 - UNWIND_HINT_REGS base=\base offset=\offset iret=1 +.macro UNWIND_HINT_IRET_ENTRY base=%rsp offset=0 signal=1 + VALIDATE_UNRET_BEGIN + UNWIND_HINT_IRET_REGS base=\base offset=\offset signal=\signal .endm -.macro UNWIND_HINT_FUNC sp_offset=8 - UNWIND_HINT sp_offset=\sp_offset +.macro UNWIND_HINT_FUNC + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm -#else /* !__ASSEMBLY__ */ +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + +#else + +#define UNWIND_HINT_UNDEFINED \ + UNWIND_HINT(UNWIND_HINT_TYPE_UNDEFINED, 0, 0, 0) -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ - "987: \n\t" \ - ".pushsection .discard.unwind_hints\n\t" \ - /* struct unwind_hint */ \ - ".long 987b - .\n\t" \ - ".short " __stringify(sp_offset) "\n\t" \ - ".byte " __stringify(sp_reg) "\n\t" \ - ".byte " __stringify(type) "\n\t" \ - ".byte " __stringify(end) "\n\t" \ - ".balign 4 \n\t" \ - ".popsection\n\t" +#define UNWIND_HINT_FUNC \ + UNWIND_HINT(UNWIND_HINT_TYPE_FUNC, ORC_REG_SP, 8, 0) -#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) +#define UNWIND_HINT_SAVE \ + UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) -#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) +#define UNWIND_HINT_RESTORE \ + UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h new file mode 100644 index 000000000000..12064284bc4e --- /dev/null +++ b/arch/x86/include/asm/unwind_user.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UNWIND_USER_H +#define _ASM_X86_UNWIND_USER_H + +#ifdef CONFIG_HAVE_UNWIND_USER_FP + +#include <asm/ptrace.h> +#include <asm/uprobes.h> + +#define ARCH_INIT_USER_FP_FRAME(ws) \ + .cfa_off = 2*(ws), \ + .ra_off = -1*(ws), \ + .fp_off = -2*(ws), \ + .use_fp = true, + +#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \ + .cfa_off = 1*(ws), \ + .ra_off = -1*(ws), \ + .fp_off = 0, \ + .use_fp = false, + +static inline int unwind_user_word_size(struct pt_regs *regs) +{ + /* We can't unwind VM86 stacks */ + if (regs->flags & X86_VM_MASK) + return 0; +#ifdef CONFIG_X86_64 + if (!user_64bit_mode(regs)) + return sizeof(int); +#endif + return sizeof(long); +} + +static inline bool unwind_user_at_function_start(struct pt_regs *regs) +{ + return is_uprobe_at_func_entry(regs); +} + +#endif /* CONFIG_HAVE_UNWIND_USER_FP */ + +#endif /* _ASM_X86_UNWIND_USER_H */ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index d8bfa98fca98..362210c79998 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -1,22 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_UPROBES_H #define _ASM_UPROBES_H /* * User-space Probes (UProbes) for x86 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2008-2011 * Authors: * Srikar Dronamraju @@ -33,6 +20,11 @@ typedef u8 uprobe_opcode_t; #define UPROBE_SWBP_INSN 0xcc #define UPROBE_SWBP_INSN_SIZE 1 +enum { + ARCH_UPROBE_FLAG_CAN_OPTIMIZE = 0, + ARCH_UPROBE_FLAG_OPTIMIZE_FAIL = 1, +}; + struct uprobe_xol_ops; struct arch_uprobe { @@ -58,6 +50,8 @@ struct arch_uprobe { u8 ilen; } push; }; + + unsigned long flags; }; struct arch_uprobe_task { @@ -68,4 +62,13 @@ struct arch_uprobe_task { unsigned int saved_tf; }; +#ifdef CONFIG_UPROBES +extern bool is_uprobe_at_func_entry(struct pt_regs *regs); +#else +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ + #endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/include/asm/user_32.h b/arch/x86/include/asm/user_32.h index d72c3d66e94f..8963915e533f 100644 --- a/arch/x86/include/asm/user_32.h +++ b/arch/x86/include/asm/user_32.h @@ -124,9 +124,5 @@ struct user{ char u_comm[32]; /* User command that was responsible */ int u_debugreg[8]; }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif /* _ASM_X86_USER_32_H */ diff --git a/arch/x86/include/asm/user_64.h b/arch/x86/include/asm/user_64.h index db909923611c..1dd10f07ccd6 100644 --- a/arch/x86/include/asm/user_64.h +++ b/arch/x86/include/asm/user_64.h @@ -130,9 +130,5 @@ struct user { unsigned long error_code; /* CPU error code or 0 */ unsigned long fault_address; /* CR3 or 0 */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif /* _ASM_X86_USER_64_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index e652a7cc6186..6989b824fd32 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -1,27 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_UV_BIOS_H #define _ASM_X86_UV_BIOS_H /* * UV BIOS layer definitions. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson <rja@sgi.com> + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) Russ Anderson <rja@sgi.com> */ +#include <linux/efi.h> #include <linux/rtc.h> /* @@ -40,6 +29,20 @@ enum uv_bios_cmd { UV_BIOS_SET_LEGACY_VGA_TARGET }; +#define UV_BIOS_EXTRA 0x10000 +#define UV_BIOS_GET_PCI_TOPOLOGY 0x10001 +#define UV_BIOS_GET_GEOINFO 0x10003 + +#define UV_BIOS_EXTRA_OP_MEM_COPYIN 0x1000 +#define UV_BIOS_EXTRA_OP_MEM_COPYOUT 0x2000 +#define UV_BIOS_EXTRA_OP_MASK 0x0fff +#define UV_BIOS_EXTRA_GET_HEAPSIZE 1 +#define UV_BIOS_EXTRA_INSTALL_HEAP 2 +#define UV_BIOS_EXTRA_MASTER_NASID 3 +#define UV_BIOS_EXTRA_OBJECT_COUNT (10|UV_BIOS_EXTRA_OP_MEM_COPYOUT) +#define UV_BIOS_EXTRA_ENUM_OBJECTS (12|UV_BIOS_EXTRA_OP_MEM_COPYOUT) +#define UV_BIOS_EXTRA_ENUM_PORTS (13|UV_BIOS_EXTRA_OP_MEM_COPYOUT) + /* * Status values returned from a BIOS call. */ @@ -48,7 +51,8 @@ enum { BIOS_STATUS_SUCCESS = 0, BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, BIOS_STATUS_EINVAL = -EINVAL, - BIOS_STATUS_UNAVAIL = -EBUSY + BIOS_STATUS_UNAVAIL = -EBUSY, + BIOS_STATUS_ABORT = -EINTR, }; /* Address map parameters */ @@ -83,18 +87,27 @@ struct uv_gam_range_entry { u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */ }; +#define UV_AT_SIZE 8 /* 7 character arch type + NULL char */ +struct uv_arch_type_entry { + char archtype[UV_AT_SIZE]; +}; + #define UV_SYSTAB_SIG "UVST" -#define UV_SYSTAB_VERSION_1 1 /* UV1/2/3 BIOS version */ +#define UV_SYSTAB_VERSION_1 1 /* UV2/3 BIOS version */ #define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */ #define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */ #define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */ #define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */ #define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3 +#define UV_SYSTAB_VERSION_UV5 0x500 /* UV5 GAM base version */ +#define UV_SYSTAB_VERSION_UV5_LATEST UV_SYSTAB_VERSION_UV5 + #define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */ #define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */ #define UV_SYSTAB_TYPE_GAM_RNG_TBL 2 /* GAM entry table */ -#define UV_SYSTAB_TYPE_MAX 3 +#define UV_SYSTAB_TYPE_ARCH_TYPE 3 /* UV arch type */ +#define UV_SYSTAB_TYPE_MAX 4 /* * The UV system table describes specific firmware @@ -103,7 +116,8 @@ struct uv_gam_range_entry { struct uv_systab { char signature[4]; /* must be UV_SYSTAB_SIG */ u32 revision; /* distinguish different firmware revs */ - u64 function; /* BIOS runtime callback function ptr */ + u64 (__efiapi *function)(enum uv_bios_cmd, ...); + /* BIOS runtime callback function ptr */ u32 size; /* systab size (starting with _VERSION_UV4) */ struct { u32 type:8; /* type of entry */ @@ -111,6 +125,32 @@ struct uv_systab { } entry[1]; /* additional entries follow */ }; extern struct uv_systab *uv_systab; + +#define UV_BIOS_MAXSTRING 128 +struct uv_bios_hub_info { + unsigned int id; + union { + struct { + unsigned long long this_part:1; + unsigned long long is_shared:1; + unsigned long long is_disabled:1; + } fields; + struct { + unsigned long long flags; + unsigned long long reserved; + } b; + } f; + char name[UV_BIOS_MAXSTRING]; + char location[UV_BIOS_MAXSTRING]; + unsigned int ports; +}; + +struct uv_bios_port_info { + unsigned int port; + unsigned int conn_id; + unsigned int conn_port; +}; + /* (... end of definitions from UV BIOS ...) */ enum { @@ -135,13 +175,6 @@ enum uv_memprotect { UV_MEMPROT_ALLOW_RW }; -/* - * bios calls have 6 parameters - */ -extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64); -extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); -extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); - extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *); extern s64 uv_bios_freq_base(u64, u64 *); extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, @@ -151,11 +184,17 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); -#ifdef CONFIG_EFI -extern void uv_bios_init(void); -#else -void uv_bios_init(void) { } -#endif +extern s64 uv_bios_get_master_nasid(u64 sz, u64 *nasid); +extern s64 uv_bios_get_heapsize(u64 nasid, u64 sz, u64 *heap_sz); +extern s64 uv_bios_install_heap(u64 nasid, u64 sz, u64 *heap); +extern s64 uv_bios_obj_count(u64 nasid, u64 sz, u64 *objcnt); +extern s64 uv_bios_enum_objs(u64 nasid, u64 sz, u64 *objbuf); +extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 sz, u64 *portbuf); +extern s64 uv_bios_get_geoinfo(u64 nasid, u64 sz, u64 *geo); +extern s64 uv_bios_get_pci_topology(u64 sz, u64 *buf); + +extern int uv_bios_init(void); +extern unsigned long get_uv_systab_phys(bool msg); extern unsigned long sn_rtc_cycles_per_second; extern int uv_type; @@ -163,8 +202,14 @@ extern long sn_partition_id; extern long sn_coherency_id; extern long sn_region_size; extern long system_serial_number; -#define uv_partition_coherence_id() (sn_coherency_id) +extern ssize_t uv_get_archtype(char *buf, int len); +extern int uv_get_hubless_system(void); extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ +/* + * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details + */ +extern struct semaphore __efi_uv_runtime_lock; + #endif /* _ASM_X86_UV_BIOS_H */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index e60c45fd3679..648eb23fe7f0 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -2,41 +2,42 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H -#include <asm/tlbflush.h> - -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; - -struct cpumask; -struct mm_struct; +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; #ifdef CONFIG_X86_UV #include <linux/efi.h> +#define UV_PROC_NODE "sgi_uv" + +static inline int uv(int uvtype) +{ + /* uv(0) is "any" */ + if (uvtype >= 0 && uvtype <= 30) + return 1 << uvtype; + return 1; +} + +extern unsigned long uv_systab_phys; + extern enum uv_system_type get_uv_system_type(void); static inline bool is_early_uv_system(void) { - return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); + return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR; } extern int is_uv_system(void); -extern int is_uv_hubless(void); +extern int is_uv_hubbed(int uvtype); extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); -extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info); -#else /* X86_UV */ +#else /* !X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } -static inline int is_uv_hubless(void) { return 0; } +static inline int is_uv_hubbed(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ return cpumask; } #endif /* X86_UV */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h deleted file mode 100644 index 7803114aa140..000000000000 --- a/arch/x86/include/asm/uv/uv_bau.h +++ /dev/null @@ -1,861 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * SGI UV Broadcast Assist Unit definitions - * - * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved. - */ - -#ifndef _ASM_X86_UV_UV_BAU_H -#define _ASM_X86_UV_UV_BAU_H - -#include <linux/bitmap.h> -#define BITSPERBYTE 8 - -/* - * Broadcast Assist Unit messaging structures - * - * Selective Broadcast activations are induced by software action - * specifying a particular 8-descriptor "set" via a 6-bit index written - * to an MMR. - * Thus there are 64 unique 512-byte sets of SB descriptors - one set for - * each 6-bit index value. These descriptor sets are mapped in sequence - * starting with set 0 located at the address specified in the - * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, - * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. - * - * We will use one set for sending BAU messages from each of the - * cpu's on the uvhub. - * - * TLB shootdown will use the first of the 8 descriptors of each set. - * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). - */ - -#define MAX_CPUS_PER_UVHUB 128 -#define MAX_CPUS_PER_SOCKET 64 -#define ADP_SZ 64 /* hardware-provided max. */ -#define UV_CPUS_PER_AS 32 /* hardware-provided max. */ -#define ITEMS_PER_DESC 8 -/* the 'throttle' to prevent the hardware stay-busy bug */ -#define MAX_BAU_CONCURRENT 3 -#define UV_ACT_STATUS_MASK 0x3 -#define UV_ACT_STATUS_SIZE 2 -#define UV_DISTRIBUTION_SIZE 256 -#define UV_SW_ACK_NPENDING 8 -#define UV1_NET_ENDPOINT_INTD 0x38 -#define UV2_NET_ENDPOINT_INTD 0x28 -#define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ - UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) -#define UV_PAYLOADQ_GNODE_SHIFT 49 -#define UV_PTC_BASENAME "sgi_uv/ptc_statistics" -#define UV_BAU_BASENAME "sgi_uv/bau_tunables" -#define UV_BAU_TUNABLES_DIR "sgi_uv" -#define UV_BAU_TUNABLES_FILE "bau_tunables" -#define WHITESPACE " \t\n" -#define cpubit_isset(cpu, bau_local_cpumask) \ - test_bit((cpu), (bau_local_cpumask).bits) - -/* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ -/* - * UV2: Bit 19 selects between - * (0): 10 microsecond timebase and - * (1): 80 microseconds - * we're using 560us, similar to UV1: 65 units of 10us - */ -#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL) -#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL) - -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (is_uv1_hub() ? \ - UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ - UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD) -/* assuming UV3 is the same */ - -#define BAU_MISC_CONTROL_MULT_MASK 3 - -#define UVH_AGING_PRESCALE_SEL 0x000000b000UL -/* [30:28] URGENCY_7 an index into a table of times */ -#define BAU_URGENCY_7_SHIFT 28 -#define BAU_URGENCY_7_MASK 7 - -#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL -/* [45:40] BAU - BAU transaction timeout select - a multiplier */ -#define BAU_TRANS_SHIFT 40 -#define BAU_TRANS_MASK 0x3f - -/* - * shorten some awkward names - */ -#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT -#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT -#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT -#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD -#define PREFETCH_HINT_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT -#define SB_STATUS_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT -#define write_gmmr uv_write_global_mmr64 -#define write_lmmr uv_write_local_mmr -#define read_lmmr uv_read_local_mmr -#define read_gmmr uv_read_global_mmr64 - -/* - * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 - */ -#define DS_IDLE 0 -#define DS_ACTIVE 1 -#define DS_DESTINATION_TIMEOUT 2 -#define DS_SOURCE_TIMEOUT 3 -/* - * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 - * values 1 and 3 will not occur - * Decoded meaning ERROR BUSY AUX ERR - * ------------------------------- ---- ----- ------- - * IDLE 0 0 0 - * BUSY (active) 0 1 0 - * SW Ack Timeout (destination) 1 0 0 - * SW Ack INTD rejected (strong NACK) 1 0 1 - * Source Side Time Out Detected 1 1 0 - * Destination Side PUT Failed 1 1 1 - */ -#define UV2H_DESC_IDLE 0 -#define UV2H_DESC_BUSY 2 -#define UV2H_DESC_DEST_TIMEOUT 4 -#define UV2H_DESC_DEST_STRONG_NACK 5 -#define UV2H_DESC_SOURCE_TIMEOUT 6 -#define UV2H_DESC_DEST_PUT_ERR 7 - -/* - * delay for 'plugged' timeout retries, in microseconds - */ -#define PLUGGED_DELAY 10 - -/* - * threshholds at which to use IPI to free resources - */ -/* after this # consecutive 'plugged' timeouts, use IPI to release resources */ -#define PLUGSB4RESET 100 -/* after this many consecutive timeouts, use IPI to release resources */ -#define TIMEOUTSB4RESET 1 -/* at this number uses of IPI to release resources, giveup the request */ -#define IPI_RESET_LIMIT 1 -/* after this # consecutive successes, bump up the throttle if it was lowered */ -#define COMPLETE_THRESHOLD 5 -/* after this # of giveups (fall back to kernel IPI's) disable the use of - the BAU for a period of time */ -#define GIVEUP_LIMIT 100 - -#define UV_LB_SUBNODEID 0x10 - -/* these two are the same for UV1 and UV2: */ -#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT -#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK -/* 4 bits of software ack period */ -#define UV2_ACK_MASK 0x7UL -#define UV2_ACK_UNITS_SHFT 3 -#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT - -/* - * number of entries in the destination side payload queue - */ -#define DEST_Q_SIZE 20 -/* - * number of destination side software ack resources - */ -#define DEST_NUM_RESOURCES 8 -/* - * completion statuses for sending a TLB flush message - */ -#define FLUSH_RETRY_PLUGGED 1 -#define FLUSH_RETRY_TIMEOUT 2 -#define FLUSH_GIVEUP 3 -#define FLUSH_COMPLETE 4 - -/* - * tuning the action when the numalink network is extremely delayed - */ -#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in - microseconds */ -#define CONGESTED_REPS 10 /* long delays averaged over - this many broadcasts */ -#define DISABLED_PERIOD 10 /* time for the bau to be - disabled, in seconds */ -/* see msg_type: */ -#define MSG_NOOP 0 -#define MSG_REGULAR 1 -#define MSG_RETRY 2 - -#define BAU_DESC_QUALIFIER 0x534749 - -enum uv_bau_version { - UV_BAU_V1 = 1, - UV_BAU_V2, - UV_BAU_V3, - UV_BAU_V4, -}; - -/* - * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) - * If the 'multilevel' flag in the header portion of the descriptor - * has been set to 0, then endpoint multi-unicast mode is selected. - * The distribution specification (32 bytes) is interpreted as a 256-bit - * distribution vector. Adjacent bits correspond to consecutive even numbered - * nodeIDs. The result of adding the index of a given bit to the 15-bit - * 'base_dest_nasid' field of the header corresponds to the - * destination nodeID associated with that specified bit. - */ -struct pnmask { - unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; -}; - -/* - * mask of cpu's on a uvhub - * (during initialization we need to check that unsigned long has - * enough bits for max. cpu's per uvhub) - */ -struct bau_local_cpumask { - unsigned long bits; -}; - -/* - * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor) - * only 12 bytes (96 bits) of the payload area are usable. - * An additional 3 bytes (bits 27:4) of the header address are carried - * to the next bytes of the destination payload queue. - * And an additional 2 bytes of the header Suppl_A field are also - * carried to the destination payload queue. - * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte) - * of the destination payload queue, which is written by the hardware - * with the s/w ack resource bit vector. - * [ effective message contents (16 bytes (128 bits) maximum), not counting - * the s/w ack bit vector ] - */ - -/** - * struct uv1_2_3_bau_msg_payload - defines payload for INTD transactions - * @address: Signifies a page or all TLB's of the cpu - * @sending_cpu: CPU from which the message originates - * @acknowledge_count: CPUs on the destination Hub that received the interrupt - */ -struct uv1_2_3_bau_msg_payload { - u64 address; - u16 sending_cpu; - u16 acknowledge_count; -}; - -/** - * struct uv4_bau_msg_payload - defines payload for INTD transactions - * @address: Signifies a page or all TLB's of the cpu - * @sending_cpu: CPU from which the message originates - * @acknowledge_count: CPUs on the destination Hub that received the interrupt - * @qualifier: Set by source to verify origin of INTD broadcast - */ -struct uv4_bau_msg_payload { - u64 address; - u16 sending_cpu; - u16 acknowledge_count; - u32 reserved:8; - u32 qualifier:24; -}; - -/* - * UV1 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) - * see table 4.2.3.0.1 in broacast_assist spec. - */ -struct uv1_bau_msg_header { - unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ - /* bits 5:0 */ - unsigned int base_dest_nasid:15; /* nasid of the first bit */ - /* bits 20:6 */ /* in uvhub map */ - unsigned int command:8; /* message type */ - /* bits 28:21 */ - /* 0x38: SN3net EndPoint Message */ - unsigned int rsvd_1:3; /* must be zero */ - /* bits 31:29 */ - /* int will align on 32 bits */ - unsigned int rsvd_2:9; /* must be zero */ - /* bits 40:32 */ - /* Suppl_A is 56-41 */ - unsigned int sequence:16; /* message sequence number */ - /* bits 56:41 */ /* becomes bytes 16-17 of msg */ - /* Address field (96:57) is - never used as an address - (these are address bits - 42:3) */ - - unsigned int rsvd_3:1; /* must be zero */ - /* bit 57 */ - /* address bits 27:4 are payload */ - /* these next 24 (58-81) bits become bytes 12-14 of msg */ - /* bits 65:58 land in byte 12 */ - unsigned int replied_to:1; /* sent as 0 by the source to - byte 12 */ - /* bit 58 */ - unsigned int msg_type:3; /* software type of the - message */ - /* bits 61:59 */ - unsigned int canceled:1; /* message canceled, resource - is to be freed*/ - /* bit 62 */ - unsigned int payload_1a:1; /* not currently used */ - /* bit 63 */ - unsigned int payload_1b:2; /* not currently used */ - /* bits 65:64 */ - - /* bits 73:66 land in byte 13 */ - unsigned int payload_1ca:6; /* not currently used */ - /* bits 71:66 */ - unsigned int payload_1c:2; /* not currently used */ - /* bits 73:72 */ - - /* bits 81:74 land in byte 14 */ - unsigned int payload_1d:6; /* not currently used */ - /* bits 79:74 */ - unsigned int payload_1e:2; /* not currently used */ - /* bits 81:80 */ - - unsigned int rsvd_4:7; /* must be zero */ - /* bits 88:82 */ - unsigned int swack_flag:1; /* software acknowledge flag */ - /* bit 89 */ - /* INTD trasactions at - destination are to wait for - software acknowledge */ - unsigned int rsvd_5:6; /* must be zero */ - /* bits 95:90 */ - unsigned int rsvd_6:5; /* must be zero */ - /* bits 100:96 */ - unsigned int int_both:1; /* if 1, interrupt both sockets - on the uvhub */ - /* bit 101*/ - unsigned int fairness:3; /* usually zero */ - /* bits 104:102 */ - unsigned int multilevel:1; /* multi-level multicast - format */ - /* bit 105 */ - /* 0 for TLB: endpoint multi-unicast messages */ - unsigned int chaining:1; /* next descriptor is part of - this activation*/ - /* bit 106 */ - unsigned int rsvd_7:21; /* must be zero */ - /* bits 127:107 */ -}; - -/* - * UV2 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) - * see figure 9-2 of harp_sys.pdf - * assuming UV3 is the same - */ -struct uv2_3_bau_msg_header { - unsigned int base_dest_nasid:15; /* nasid of the first bit */ - /* bits 14:0 */ /* in uvhub map */ - unsigned int dest_subnodeid:5; /* must be 0x10, for the LB */ - /* bits 19:15 */ - unsigned int rsvd_1:1; /* must be zero */ - /* bit 20 */ - /* Address bits 59:21 */ - /* bits 25:2 of address (44:21) are payload */ - /* these next 24 bits become bytes 12-14 of msg */ - /* bits 28:21 land in byte 12 */ - unsigned int replied_to:1; /* sent as 0 by the source to - byte 12 */ - /* bit 21 */ - unsigned int msg_type:3; /* software type of the - message */ - /* bits 24:22 */ - unsigned int canceled:1; /* message canceled, resource - is to be freed*/ - /* bit 25 */ - unsigned int payload_1:3; /* not currently used */ - /* bits 28:26 */ - - /* bits 36:29 land in byte 13 */ - unsigned int payload_2a:3; /* not currently used */ - unsigned int payload_2b:5; /* not currently used */ - /* bits 36:29 */ - - /* bits 44:37 land in byte 14 */ - unsigned int payload_3:8; /* not currently used */ - /* bits 44:37 */ - - unsigned int rsvd_2:7; /* reserved */ - /* bits 51:45 */ - unsigned int swack_flag:1; /* software acknowledge flag */ - /* bit 52 */ - unsigned int rsvd_3a:3; /* must be zero */ - unsigned int rsvd_3b:8; /* must be zero */ - unsigned int rsvd_3c:8; /* must be zero */ - unsigned int rsvd_3d:3; /* must be zero */ - /* bits 74:53 */ - unsigned int fairness:3; /* usually zero */ - /* bits 77:75 */ - - unsigned int sequence:16; /* message sequence number */ - /* bits 93:78 Suppl_A */ - unsigned int chaining:1; /* next descriptor is part of - this activation*/ - /* bit 94 */ - unsigned int multilevel:1; /* multi-level multicast - format */ - /* bit 95 */ - unsigned int rsvd_4:24; /* ordered / source node / - source subnode / aging - must be zero */ - /* bits 119:96 */ - unsigned int command:8; /* message type */ - /* bits 127:120 */ -}; - -/* - * The activation descriptor: - * The format of the message to send, plus all accompanying control - * Should be 64 bytes - */ -struct bau_desc { - struct pnmask distribution; - /* - * message template, consisting of header and payload: - */ - union bau_msg_header { - struct uv1_bau_msg_header uv1_hdr; - struct uv2_3_bau_msg_header uv2_3_hdr; - } header; - - union bau_payload_header { - struct uv1_2_3_bau_msg_payload uv1_2_3; - struct uv4_bau_msg_payload uv4; - } payload; -}; -/* UV1: - * -payload-- ---------header------ - * bytes 0-11 bits 41-56 bits 58-81 - * A B (2) C (3) - * - * A/B/C are moved to: - * A C B - * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector) - * ------------payload queue----------- - */ -/* UV2: - * -payload-- ---------header------ - * bytes 0-11 bits 70-78 bits 21-44 - * A B (2) C (3) - * - * A/B/C are moved to: - * A C B - * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector) - * ------------payload queue----------- - */ - -/* - * The payload queue on the destination side is an array of these. - * With BAU_MISC_CONTROL set for software acknowledge mode, the messages - * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 - * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) - * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from - * swack_vec and payload_2) - * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software - * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload - * operation." - */ -struct bau_pq_entry { - unsigned long address; /* signifies a page or all TLB's - of the cpu */ - /* 64 bits, bytes 0-7 */ - unsigned short sending_cpu; /* cpu that sent the message */ - /* 16 bits, bytes 8-9 */ - unsigned short acknowledge_count; /* filled in by destination */ - /* 16 bits, bytes 10-11 */ - /* these next 3 bytes come from bits 58-81 of the message header */ - unsigned short replied_to:1; /* sent as 0 by the source */ - unsigned short msg_type:3; /* software message type */ - unsigned short canceled:1; /* sent as 0 by the source */ - unsigned short unused1:3; /* not currently using */ - /* byte 12 */ - unsigned char unused2a; /* not currently using */ - /* byte 13 */ - unsigned char unused2; /* not currently using */ - /* byte 14 */ - unsigned char swack_vec; /* filled in by the hardware */ - /* byte 15 (bits 127:120) */ - unsigned short sequence; /* message sequence number */ - /* bytes 16-17 */ - unsigned char unused4[2]; /* not currently using bytes 18-19 */ - /* bytes 18-19 */ - int number_of_cpus; /* filled in at destination */ - /* 32 bits, bytes 20-23 (aligned) */ - unsigned char unused5[8]; /* not using */ - /* bytes 24-31 */ -}; - -struct msg_desc { - struct bau_pq_entry *msg; - int msg_slot; - struct bau_pq_entry *queue_first; - struct bau_pq_entry *queue_last; -}; - -struct reset_args { - int sender; -}; - -/* - * This structure is allocated per_cpu for UV TLB shootdown statistics. - */ -struct ptc_stats { - /* sender statistics */ - unsigned long s_giveup; /* number of fall backs to - IPI-style flushes */ - unsigned long s_requestor; /* number of shootdown - requests */ - unsigned long s_stimeout; /* source side timeouts */ - unsigned long s_dtimeout; /* destination side timeouts */ - unsigned long s_strongnacks; /* number of strong nack's */ - unsigned long s_time; /* time spent in sending side */ - unsigned long s_retriesok; /* successful retries */ - unsigned long s_ntargcpu; /* total number of cpu's - targeted */ - unsigned long s_ntargself; /* times the sending cpu was - targeted */ - unsigned long s_ntarglocals; /* targets of cpus on the local - blade */ - unsigned long s_ntargremotes; /* targets of cpus on remote - blades */ - unsigned long s_ntarglocaluvhub; /* targets of the local hub */ - unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ - unsigned long s_ntarguvhub; /* total number of uvhubs - targeted */ - unsigned long s_ntarguvhub16; /* number of times target - hubs >= 16*/ - unsigned long s_ntarguvhub8; /* number of times target - hubs >= 8 */ - unsigned long s_ntarguvhub4; /* number of times target - hubs >= 4 */ - unsigned long s_ntarguvhub2; /* number of times target - hubs >= 2 */ - unsigned long s_ntarguvhub1; /* number of times target - hubs == 1 */ - unsigned long s_resets_plug; /* ipi-style resets from plug - state */ - unsigned long s_resets_timeout; /* ipi-style resets from - timeouts */ - unsigned long s_busy; /* status stayed busy past - s/w timer */ - unsigned long s_throttles; /* waits in throttle */ - unsigned long s_retry_messages; /* retry broadcasts */ - unsigned long s_bau_reenabled; /* for bau enable/disable */ - unsigned long s_bau_disabled; /* for bau enable/disable */ - unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ - unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ - unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ - unsigned long s_overipilimit; /* over the ipi reset limit */ - unsigned long s_giveuplimit; /* disables, over giveup limit*/ - unsigned long s_enters; /* entries to the driver */ - unsigned long s_ipifordisabled; /* fall back to IPI; disabled */ - unsigned long s_plugged; /* plugged by h/w bug*/ - unsigned long s_congested; /* giveup on long wait */ - /* destination statistics */ - unsigned long d_alltlb; /* times all tlb's on this - cpu were flushed */ - unsigned long d_onetlb; /* times just one tlb on this - cpu was flushed */ - unsigned long d_multmsg; /* interrupts with multiple - messages */ - unsigned long d_nomsg; /* interrupts with no message */ - unsigned long d_time; /* time spent on destination - side */ - unsigned long d_requestee; /* number of messages - processed */ - unsigned long d_retries; /* number of retry messages - processed */ - unsigned long d_canceled; /* number of messages canceled - by retries */ - unsigned long d_nocanceled; /* retries that found nothing - to cancel */ - unsigned long d_resets; /* number of ipi-style requests - processed */ - unsigned long d_rcanceled; /* number of messages canceled - by resets */ -}; - -struct tunables { - int *tunp; - int deflt; -}; - -struct hub_and_pnode { - short uvhub; - short pnode; -}; - -struct socket_desc { - short num_cpus; - short cpu_number[MAX_CPUS_PER_SOCKET]; -}; - -struct uvhub_desc { - unsigned short socket_mask; - short num_cpus; - short uvhub; - short pnode; - struct socket_desc socket[2]; -}; - -/** - * struct bau_control - * @status_mmr: location of status mmr, determined by uvhub_cpu - * @status_index: index of ERR|BUSY bits in status mmr, determined by uvhub_cpu - * - * Per-cpu control struct containing CPU topology information and BAU tuneables. - */ -struct bau_control { - struct bau_desc *descriptor_base; - struct bau_pq_entry *queue_first; - struct bau_pq_entry *queue_last; - struct bau_pq_entry *bau_msg_head; - struct bau_control *uvhub_master; - struct bau_control *socket_master; - struct ptc_stats *statp; - cpumask_t *cpumask; - unsigned long timeout_interval; - unsigned long set_bau_on_time; - atomic_t active_descriptor_count; - int plugged_tries; - int timeout_tries; - int ipi_attempts; - int conseccompletes; - u64 status_mmr; - int status_index; - bool nobau; - short baudisabled; - short cpu; - short osnode; - short uvhub_cpu; - short uvhub; - short uvhub_version; - short cpus_in_socket; - short cpus_in_uvhub; - short partition_base_pnode; - short busy; /* all were busy (war) */ - unsigned short message_number; - unsigned short uvhub_quiesce; - short socket_acknowledge_count[DEST_Q_SIZE]; - cycles_t send_message; - cycles_t period_end; - cycles_t period_time; - spinlock_t uvhub_lock; - spinlock_t queue_lock; - spinlock_t disable_lock; - /* tunables */ - int max_concurr; - int max_concurr_const; - int plugged_delay; - int plugsb4reset; - int timeoutsb4reset; - int ipi_reset_limit; - int complete_threshold; - int cong_response_us; - int cong_reps; - cycles_t disabled_period; - int period_giveups; - int giveup_limit; - long period_requests; - struct hub_and_pnode *thp; -}; - -/* Abstracted BAU functions */ -struct bau_operations { - unsigned long (*read_l_sw_ack)(void); - unsigned long (*read_g_sw_ack)(int pnode); - unsigned long (*bau_gpa_to_offset)(unsigned long vaddr); - void (*write_l_sw_ack)(unsigned long mmr); - void (*write_g_sw_ack)(int pnode, unsigned long mmr); - void (*write_payload_first)(int pnode, unsigned long mmr); - void (*write_payload_last)(int pnode, unsigned long mmr); - int (*wait_completion)(struct bau_desc*, - struct bau_control*, long try); -}; - -static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image); -} - -static inline void write_mmr_descriptor_base(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image); -} - -static inline void write_mmr_activation(unsigned long index) -{ - write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); -} - -static inline void write_gmmr_activation(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image); -} - -static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image); -} - -static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image); -} - -static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image); -} - -static inline void write_mmr_payload_tail(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image); -} - -static inline void write_mmr_payload_last(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image); -} - -static inline void write_mmr_misc_control(int pnode, unsigned long mmr_image) -{ - write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); -} - -static inline unsigned long read_mmr_misc_control(int pnode) -{ - return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL); -} - -static inline void write_mmr_sw_ack(unsigned long mr) -{ - uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); -} - -static inline void write_gmmr_sw_ack(int pnode, unsigned long mr) -{ - write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); -} - -static inline unsigned long read_mmr_sw_ack(void) -{ - return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); -} - -static inline unsigned long read_gmmr_sw_ack(int pnode) -{ - return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); -} - -static inline void write_mmr_proc_sw_ack(unsigned long mr) -{ - uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr); -} - -static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr) -{ - write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr); -} - -static inline unsigned long read_mmr_proc_sw_ack(void) -{ - return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING); -} - -static inline unsigned long read_gmmr_proc_sw_ack(int pnode) -{ - return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING); -} - -static inline void write_mmr_data_config(int pnode, unsigned long mr) -{ - uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr); -} - -static inline int bau_uvhub_isset(int uvhub, struct pnmask *dstp) -{ - return constant_test_bit(uvhub, &dstp->bits[0]); -} -static inline void bau_uvhub_set(int pnode, struct pnmask *dstp) -{ - __set_bit(pnode, &dstp->bits[0]); -} -static inline void bau_uvhubs_clear(struct pnmask *dstp, - int nbits) -{ - bitmap_zero(&dstp->bits[0], nbits); -} -static inline int bau_uvhub_weight(struct pnmask *dstp) -{ - return bitmap_weight((unsigned long *)&dstp->bits[0], - UV_DISTRIBUTION_SIZE); -} - -static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) -{ - bitmap_zero(&dstp->bits, nbits); -} - -extern void uv_bau_message_intr1(void); -#ifdef CONFIG_TRACING -#define trace_uv_bau_message_intr1 uv_bau_message_intr1 -#endif -extern void uv_bau_timeout_intr1(void); - -struct atomic_short { - short counter; -}; - -/* - * atomic_read_short - read a short atomic variable - * @v: pointer of type atomic_short - * - * Atomically reads the value of @v. - */ -static inline int atomic_read_short(const struct atomic_short *v) -{ - return v->counter; -} - -/* - * atom_asr - add and return a short int - * @i: short value to add - * @v: pointer of type atomic_short - * - * Atomically adds @i to @v and returns @i + @v - */ -static inline int atom_asr(short i, struct atomic_short *v) -{ - short __i = i; - asm volatile(LOCK_PREFIX "xaddw %0, %1" - : "+r" (i), "+m" (v->counter) - : : "memory"); - return i + __i; -} - -/* - * conditionally add 1 to *v, unless *v is >= u - * return 0 if we cannot add 1 to *v because it is >= u - * return 1 if we can add 1 to *v because it is < u - * the add is atomic - * - * This is close to atomic_add_unless(), but this allows the 'u' value - * to be lowered below the current 'v'. atomic_add_unless can only stop - * on equal. - */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) -{ - spin_lock(lock); - if (atomic_read(v) >= u) { - spin_unlock(lock); - return 0; - } - atomic_inc(v); - spin_unlock(lock); - return 1; -} - -#endif /* _ASM_X86_UV_UV_BAU_H */ diff --git a/arch/x86/include/asm/uv/uv_geo.h b/arch/x86/include/asm/uv/uv_geo.h new file mode 100644 index 000000000000..027a9258dbca --- /dev/null +++ b/arch/x86/include/asm/uv/uv_geo.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Hewlett Packard Enterprise Development LP. All rights reserved. + */ + +#ifndef _ASM_UV_GEO_H +#define _ASM_UV_GEO_H + +/* Type declarations */ + +/* Size of a geoid_s structure (must be before decl. of geoid_u) */ +#define GEOID_SIZE 8 + +/* Fields common to all substructures */ +struct geo_common_s { + unsigned char type; /* What type of h/w is named by this geoid_s */ + unsigned char blade; + unsigned char slot; /* slot is IRU */ + unsigned char upos; + unsigned char rack; +}; + +/* Additional fields for particular types of hardware */ +struct geo_node_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_rtr_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_iocntl_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_pcicard_s { + struct geo_iocntl_s common; + char bus; /* Bus/widget number */ + char slot; /* PCI slot number */ +}; + +/* Subcomponents of a node */ +struct geo_cpu_s { + struct geo_node_s node; + unsigned char socket:4, /* Which CPU on the node */ + thread:4; + unsigned char core; +}; + +struct geo_mem_s { + struct geo_node_s node; + char membus; /* The memory bus on the node */ + char memslot; /* The memory slot on the bus */ +}; + +union geoid_u { + struct geo_common_s common; + struct geo_node_s node; + struct geo_iocntl_s iocntl; + struct geo_pcicard_s pcicard; + struct geo_rtr_s rtr; + struct geo_cpu_s cpu; + struct geo_mem_s mem; + char padsize[GEOID_SIZE]; +}; + +/* Defined constants */ + +#define GEO_MAX_LEN 48 + +#define GEO_TYPE_INVALID 0 +#define GEO_TYPE_MODULE 1 +#define GEO_TYPE_NODE 2 +#define GEO_TYPE_RTR 3 +#define GEO_TYPE_IOCNTL 4 +#define GEO_TYPE_IOCARD 5 +#define GEO_TYPE_CPU 6 +#define GEO_TYPE_MEM 7 +#define GEO_TYPE_MAX (GEO_TYPE_MEM+1) + +static inline int geo_rack(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.rack; +} + +static inline int geo_slot(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.upos; +} + +static inline int geo_blade(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.blade * 2 + g.common.slot; +} + +#endif /* _ASM_UV_GEO_H */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 44cf6d6deb7a..ea877fd83114 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -5,6 +5,7 @@ * * SGI UV architectural definitions * + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved. */ @@ -19,6 +20,7 @@ #include <linux/topology.h> #include <asm/types.h> #include <asm/percpu.h> +#include <asm/uv/uv.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/bios.h> #include <asm/irq_vectors.h> @@ -128,17 +130,6 @@ */ #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2) -/* System Controller Interface Reg info */ -struct uv_scir_s { - struct timer_list timer; - unsigned long offset; - unsigned long last; - unsigned long idle_on; - unsigned long idle_off; - unsigned char state; - unsigned char enabled; -}; - /* GAM (globally addressed memory) range table */ struct uv_gam_range_s { u32 limit; /* PA bits 56:26 (GAM_RANGE_SHFT) */ @@ -154,6 +145,8 @@ struct uv_gam_range_s { * available in the L3 cache on the cpu socket for the node. */ struct uv_hub_info_s { + unsigned int hub_type; + unsigned char hub_revision; unsigned long global_mmr_base; unsigned long global_mmr_shift; unsigned long gpa_mask; @@ -166,9 +159,9 @@ struct uv_hub_info_s { unsigned char m_val; unsigned char n_val; unsigned char gr_table_len; - unsigned char hub_revision; unsigned char apic_pnode_shift; unsigned char gpa_shift; + unsigned char nasid_shift; unsigned char m_shift; unsigned char n_lshift; unsigned int gnode_extra; @@ -184,22 +177,20 @@ struct uv_hub_info_s { unsigned short nr_possible_cpus; unsigned short nr_online_cpus; short memory_nid; + unsigned short *node_to_socket; }; /* CPU specific info with a pointer to the hub common info struct */ struct uv_cpu_info_s { void *p_uv_hub_info; unsigned char blade_cpu_id; - struct uv_scir_s scir; + void *reserved; }; DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info); #define uv_cpu_info this_cpu_ptr(&__uv_cpu_info) #define uv_cpu_info_per(cpu) (&per_cpu(__uv_cpu_info, cpu)) -#define uv_scir_info (&uv_cpu_info->scir) -#define uv_cpu_scir_info(cpu) (&uv_cpu_info_per(cpu)->scir) - /* Node specific hub common info struct */ extern void **__uv_hub_info_list; static inline struct uv_hub_info_s *uv_hub_info_list(int node) @@ -218,109 +209,51 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu) return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info; } -#define UV_HUB_INFO_VERSION 0x7150 -extern int uv_hub_info_version(void); -static inline int uv_hub_info_check(int version) +static inline int uv_hub_type(void) { - if (uv_hub_info_version() == version) - return 0; - - pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n", - uv_hub_info_version(), version); + return uv_hub_info->hub_type; +} - BUG(); /* Catastrophic - cannot continue on unknown UV system */ +static inline __init void uv_hub_type_set(int uvmask) +{ + uv_hub_info->hub_type = uvmask; } -#define _uv_hub_info_check() uv_hub_info_check(UV_HUB_INFO_VERSION) + /* * HUB revision ranges for each UV HUB architecture. * This is a software convention - NOT the hardware revision numbers in * the hub chip. */ -#define UV1_HUB_REVISION_BASE 1 #define UV2_HUB_REVISION_BASE 3 #define UV3_HUB_REVISION_BASE 5 #define UV4_HUB_REVISION_BASE 7 #define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ +#define UV5_HUB_REVISION_BASE 9 -#ifdef UV1_HUB_IS_SUPPORTED -static inline int is_uv1_hub(void) -{ - return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE; -} -#else -static inline int is_uv1_hub(void) -{ - return 0; -} -#endif +static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; } +static inline int is_uv1_hub(void) { return 0; } +static inline int is_uv2_hub(void) { return is_uv(UV2); } +static inline int is_uv3_hub(void) { return is_uv(UV3); } +static inline int is_uv4a_hub(void) { return is_uv(UV4A); } +static inline int is_uv4_hub(void) { return is_uv(UV4); } +static inline int is_uv5_hub(void) { return is_uv(UV5); } -#ifdef UV2_HUB_IS_SUPPORTED -static inline int is_uv2_hub(void) -{ - return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) && - (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE)); -} -#else -static inline int is_uv2_hub(void) -{ - return 0; -} -#endif - -#ifdef UV3_HUB_IS_SUPPORTED -static inline int is_uv3_hub(void) -{ - return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) && - (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)); -} -#else -static inline int is_uv3_hub(void) -{ - return 0; -} -#endif - -/* First test "is UV4A", then "is UV4" */ -#ifdef UV4A_HUB_IS_SUPPORTED -static inline int is_uv4a_hub(void) -{ - return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE); -} -#else -static inline int is_uv4a_hub(void) -{ - return 0; -} -#endif +/* + * UV4A is a revision of UV4. So on UV4A, both is_uv4_hub() and + * is_uv4a_hub() return true, While on UV4, only is_uv4_hub() + * returns true. So to get true results, first test if is UV4A, + * then test if is UV4. + */ -#ifdef UV4_HUB_IS_SUPPORTED -static inline int is_uv4_hub(void) -{ - return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE; -} -#else -static inline int is_uv4_hub(void) -{ - return 0; -} -#endif +/* UVX class: UV2,3,4 */ +static inline int is_uvx_hub(void) { return is_uv(UVX); } -static inline int is_uvx_hub(void) -{ - if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) - return uv_hub_info->hub_revision; +/* UVY class: UV5,..? */ +static inline int is_uvy_hub(void) { return is_uv(UVY); } - return 0; -} - -static inline int is_uv_hub(void) -{ -#ifdef UV1_HUB_IS_SUPPORTED - return uv_hub_info->hub_revision; -#endif - return is_uvx_hub(); -} +/* Any UV Hubbed System */ +static inline int is_uv_hub(void) { return is_uv(UV_ANY); } union uvh_apicid { unsigned long v; @@ -342,14 +275,11 @@ union uvh_apicid { * g - GNODE (full 15-bit global nasid, right shifted 1) * p - PNODE (local part of nsids, right shifted 1) */ -#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) +#define UV_NASID_TO_PNODE(n) \ + (((n) >> uv_hub_info->nasid_shift) & uv_hub_info->pnode_mask) #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) -#define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1) - -#define UV1_LOCAL_MMR_BASE 0xf4000000UL -#define UV1_GLOBAL_MMR32_BASE 0xf8000000UL -#define UV1_LOCAL_MMR_SIZE (64UL * 1024 * 1024) -#define UV1_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) +#define UV_PNODE_TO_NASID(p) \ + (UV_PNODE_TO_GNODE(p) << uv_hub_info->nasid_shift) #define UV2_LOCAL_MMR_BASE 0xfa000000UL #define UV2_GLOBAL_MMR32_BASE 0xfc000000UL @@ -362,33 +292,42 @@ union uvh_apicid { #define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) #define UV4_LOCAL_MMR_BASE 0xfa000000UL -#define UV4_GLOBAL_MMR32_BASE 0xfc000000UL +#define UV4_GLOBAL_MMR32_BASE 0 #define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024) -#define UV4_GLOBAL_MMR32_SIZE (16UL * 1024 * 1024) +#define UV4_GLOBAL_MMR32_SIZE 0 + +#define UV5_LOCAL_MMR_BASE 0xfa000000UL +#define UV5_GLOBAL_MMR32_BASE 0 +#define UV5_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV5_GLOBAL_MMR32_SIZE 0 #define UV_LOCAL_MMR_BASE ( \ - is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \ - is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \ - is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \ - /*is_uv4_hub*/ UV4_LOCAL_MMR_BASE) + is_uv(UV2) ? UV2_LOCAL_MMR_BASE : \ + is_uv(UV3) ? UV3_LOCAL_MMR_BASE : \ + is_uv(UV4) ? UV4_LOCAL_MMR_BASE : \ + is_uv(UV5) ? UV5_LOCAL_MMR_BASE : \ + 0) #define UV_GLOBAL_MMR32_BASE ( \ - is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE : \ - is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \ - is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \ - /*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE) + is_uv(UV2) ? UV2_GLOBAL_MMR32_BASE : \ + is_uv(UV3) ? UV3_GLOBAL_MMR32_BASE : \ + is_uv(UV4) ? UV4_GLOBAL_MMR32_BASE : \ + is_uv(UV5) ? UV5_GLOBAL_MMR32_BASE : \ + 0) #define UV_LOCAL_MMR_SIZE ( \ - is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ - is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \ - is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \ - /*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE) + is_uv(UV2) ? UV2_LOCAL_MMR_SIZE : \ + is_uv(UV3) ? UV3_LOCAL_MMR_SIZE : \ + is_uv(UV4) ? UV4_LOCAL_MMR_SIZE : \ + is_uv(UV5) ? UV5_LOCAL_MMR_SIZE : \ + 0) #define UV_GLOBAL_MMR32_SIZE ( \ - is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE : \ - is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \ - is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \ - /*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE) + is_uv(UV2) ? UV2_GLOBAL_MMR32_SIZE : \ + is_uv(UV3) ? UV3_GLOBAL_MMR32_SIZE : \ + is_uv(UV4) ? UV4_GLOBAL_MMR32_SIZE : \ + is_uv(UV5) ? UV5_GLOBAL_MMR32_SIZE : \ + 0) #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) @@ -406,8 +345,6 @@ union uvh_apicid { #define UVH_APICID 0x002D0E00L #define UV_APIC_PNODE_SHIFT 6 -#define UV_APICID_HIBIT_MASK 0xffff0000 - /* Local Bus from cpu's perspective */ #define LOCAL_BUS_BASE 0x1c00000 #define LOCAL_BUS_SIZE (4 * 1024 * 1024) @@ -417,7 +354,7 @@ union uvh_apicid { * * Note there are NO leds on a UV system. This register is only * used by the system controller to monitor system-wide operation. - * There are 64 regs per node. With Nahelem cpus (2 cores per node, + * There are 64 regs per node. With Nehalem cpus (2 cores per node, * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on * a node. * @@ -583,25 +520,30 @@ static inline int uv_socket_to_node(int socket) return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); } +static inline int uv_pnode_to_socket(int pnode) +{ + unsigned short *p2s = uv_hub_info->pnode_to_socket; + + return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; +} + /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { unsigned int m_val = uv_hub_info->m_val; unsigned long base; - unsigned short sockid, node, *p2s; + unsigned short sockid; if (m_val) return __va(((unsigned long)pnode << m_val) | offset); - p2s = uv_hub_info->pnode_to_socket; - sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; - node = uv_socket_to_node(sockid); + sockid = uv_pnode_to_socket(pnode); /* limit address of previous socket is our base, except node 0 is 0 */ - if (!node) + if (sockid == 0) return __va((unsigned long)offset); - base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); + base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit); return __va(base << UV_GAM_RANGE_SHFT | offset); } @@ -614,15 +556,6 @@ static inline int uv_apicid_to_pnode(int apicid) return s2pn ? s2pn[pnode - uv_hub_info->min_socket] : pnode; } -/* Convert an apicid to the socket number on the blade */ -static inline int uv_apicid_to_socket(int apicid) -{ - if (is_uv1_hub()) - return (apicid >> (uv_hub_info->apic_pnode_shift - 1)) & 1; - else - return 0; -} - /* * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. @@ -713,15 +646,14 @@ static inline int uv_cpu_blade_processor_id(int cpu) { return uv_cpu_info_per(cpu)->blade_cpu_id; } -#define _uv_cpu_blade_processor_id 1 /* indicate function available */ -/* Blade number to Node number (UV1..UV4 is 1:1) */ +/* Blade number to Node number (UV2..UV4 is 1:1) */ static inline int uv_blade_to_node(int blade) { - return blade; + return uv_socket_to_node(blade); } -/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ +/* Blade number of current cpu. Numbered 0 .. <#blades -1> */ static inline int uv_numa_blade_id(void) { return uv_hub_info->numa_blade_id; @@ -729,24 +661,28 @@ static inline int uv_numa_blade_id(void) /* * Convert linux node number to the UV blade number. - * .. Currently for UV1 thru UV4 the node and the blade are identical. - * .. If this changes then you MUST check references to this function! + * .. Currently for UV2 thru UV4 the node and the blade are identical. + * .. UV5 needs conversion when sub-numa clustering is enabled. */ static inline int uv_node_to_blade_id(int nid) { - return nid; + unsigned short *n2s = uv_hub_info->node_to_socket; + + return n2s ? n2s[nid] : nid; } -/* Convert a cpu number to the the UV blade number */ +/* Convert a CPU number to the UV blade number */ static inline int uv_cpu_to_blade_id(int cpu) { - return uv_node_to_blade_id(cpu_to_node(cpu)); + return uv_cpu_hub_info(cpu)->numa_blade_id; } /* Convert a blade id to the PNODE of the blade */ static inline int uv_blade_to_pnode(int bid) { - return uv_hub_info_list(uv_blade_to_node(bid))->pnode; + unsigned short *s2p = uv_hub_info->socket_to_pnode; + + return s2p ? s2p[bid] : bid; } /* Nid of memory node on blade. -1 if no blade-local memory */ @@ -801,7 +737,7 @@ extern void uv_nmi_setup_hubless(void); #define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */ #define UVH_TSC_SYNC_MASK 3 /* 0011 */ #define UVH_TSC_SYNC_VALID 3 /* 0011 */ -#define UVH_TSC_SYNC_INVALID 2 /* 0010 */ +#define UVH_TSC_SYNC_UNKNOWN 0 /* 0000 */ /* BMC sets a bit this MMR non-zero before sending an NMI */ #define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR @@ -809,19 +745,6 @@ extern void uv_nmi_setup_hubless(void); #define UVH_NMI_MMR_SHIFT 63 #define UVH_NMI_MMR_TYPE "SCRATCH5" -/* Newer SMM NMI handler, not present in all systems */ -#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0 -#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS -#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT -#define UVH_NMI_MMRX_TYPE "EXTIO_INT0" - -/* Non-zero indicates newer SMM NMI handler present */ -#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST - -/* Indicates to BIOS that we want to use the newer SMM NMI handler */ -#define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2 -#define UVH_NMI_MMRX_REQ_SHIFT 62 - struct uv_hub_nmi_s { raw_spinlock_t nmi_lock; atomic_t in_nmi; /* flag this node in UV NMI IRQ */ @@ -853,51 +776,6 @@ DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); #define UV_NMI_STATE_DUMP 2 #define UV_NMI_STATE_DUMP_DONE 3 -/* Update SCIR state */ -static inline void uv_set_scir_bits(unsigned char value) -{ - if (uv_scir_info->state != value) { - uv_scir_info->state = value; - uv_write_local_mmr8(uv_scir_info->offset, value); - } -} - -static inline unsigned long uv_scir_offset(int apicid) -{ - return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); -} - -static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) -{ - if (uv_cpu_scir_info(cpu)->state != value) { - uv_write_global_mmr8(uv_cpu_to_pnode(cpu), - uv_cpu_scir_info(cpu)->offset, value); - uv_cpu_scir_info(cpu)->state = value; - } -} - -extern unsigned int uv_apicid_hibits; -static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) -{ - apicid |= uv_apicid_hibits; - return (1UL << UVH_IPI_INT_SEND_SHFT) | - ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | - (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); -} - -static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) -{ - unsigned long val; - unsigned long dmode = dest_Fixed; - - if (vector == NMI_VECTOR) - dmode = dest_NMI; - - val = uv_hub_ipi_value(apicid, vector, dmode); - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); -} - /* * Get the minimum revision number of the hub chips within the partition. * (See UVx_HUB_REVISION_BASE above for specific values.) diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index d6b17c760622..1876b5edd142 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -31,7 +31,6 @@ enum { UV_AFFINITY_CPU }; -extern int uv_irq_2_mmr_info(int, unsigned long *, int *); extern int uv_setup_irq(char *, int, int, unsigned long, int); extern void uv_teardown_irq(unsigned int); diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 62c79e26a59a..bb45812889dd 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -3,8 +3,9 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * SGI UV MMR definitions + * HPE UV MMR definitions * + * (C) Copyright 2020 Hewlett Packard Enterprise Development LP * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved. */ @@ -18,58 +19,43 @@ * grouped by architecture types. * * UVH - definitions common to all UV hub types. - * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4). - * UV1H - definitions specific to UV type 1 hub. - * UV2H - definitions specific to UV type 2 hub. - * UV3H - definitions specific to UV type 3 hub. + * UVXH - definitions common to UVX class (2, 3, 4). + * UVYH - definitions common to UVY class (5). + * UV5H - definitions specific to UV type 5 hub. + * UV4AH - definitions specific to UV type 4A hub. * UV4H - definitions specific to UV type 4 hub. - * - * So in general, MMR addresses and structures are identical on all hubs types. - * These MMRs are identified as: - * #define UVH_xxx <address> - * union uvh_xxx { - * unsigned long v; - * struct uvh_int_cmpd_s { - * } s; - * }; + * UV3H - definitions specific to UV type 3 hub. + * UV2H - definitions specific to UV type 2 hub. * * If the MMR exists on all hub types but have different addresses, - * use a conditional operator to define the value at runtime. - * #define UV1Hxxx a - * #define UV2Hxxx b - * #define UV3Hxxx c - * #define UV4Hxxx d - * #define UV4AHxxx e - * #define UVHxxx (is_uv1_hub() ? UV1Hxxx : - * (is_uv2_hub() ? UV2Hxxx : - * (is_uv3_hub() ? UV3Hxxx : - * (is_uv4a_hub() ? UV4AHxxx : - * UV4Hxxx)) + * use a conditional operator to define the value at runtime. Any + * that are not defined are blank. + * (UV4A variations only generated if different from uv4) + * #define UVHxxx ( + * is_uv(UV5) ? UV5Hxxx value : + * is_uv(UV4A) ? UV4AHxxx value : + * is_uv(UV4) ? UV4Hxxx value : + * is_uv(UV3) ? UV3Hxxx value : + * is_uv(UV2) ? UV2Hxxx value : + * <ucv> or <undef value>) * - * If the MMR exists on all hub types > 1 but have different addresses, the - * variation using "UVX" as the prefix exists. - * #define UV2Hxxx b - * #define UV3Hxxx c - * #define UV4Hxxx d - * #define UV4AHxxx e - * #define UVHxxx (is_uv2_hub() ? UV2Hxxx : - * (is_uv3_hub() ? UV3Hxxx : - * (is_uv4a_hub() ? UV4AHxxx : - * UV4Hxxx)) + * Class UVX has UVs (2|3|4|4A). + * Class UVY has UVs (5). * * union uvh_xxx { * unsigned long v; * struct uvh_xxx_s { # Common fields only * } s; - * struct uv1h_xxx_s { # Full UV1 definition (*) - * } s1; - * struct uv2h_xxx_s { # Full UV2 definition (*) - * } s2; - * struct uv3h_xxx_s { # Full UV3 definition (*) - * } s3; - * (NOTE: No struct uv4ah_xxx_s members exist) + * struct uv5h_xxx_s { # Full UV5 definition (*) + * } s5; + * struct uv4ah_xxx_s { # Full UV4A definition (*) + * } s4a; * struct uv4h_xxx_s { # Full UV4 definition (*) * } s4; + * struct uv3h_xxx_s { # Full UV3 definition (*) + * } s3; + * struct uv2h_xxx_s { # Full UV2 definition (*) + * } s2; * }; * (* - if present and different than the common struct) * @@ -78,18 +64,22 @@ * if the contents is the same for all hubs, only the "s" structure is * generated. * - * If the MMR exists on ONLY 1 type of hub, no generic definition is - * generated: - * #define UVnH_xxx <uvn address> - * union uvnh_xxx { - * unsigned long v; - * struct uvh_int_cmpd_s { - * } sn; - * }; - * - * (GEN Flags: mflags_opt= undefs=function UV234=UVXH) + * (GEN Flags: undefs=function) */ + /* UV bit masks */ +#define UV2 (1 << 0) +#define UV3 (1 << 1) +#define UV4 (1 << 2) +#define UV4A (1 << 3) +#define UV5 (1 << 4) +#define UVX (UV2|UV3|UV4) +#define UVY (UV5) +#define UV_ANY (~0) + + + + #define UV_MMR_ENABLE (1UL << 63) #define UV1_HUB_PART_NUMBER 0x88a5 @@ -98,533 +88,475 @@ #define UV3_HUB_PART_NUMBER 0x9578 #define UV3_HUB_PART_NUMBER_X 0x4321 #define UV4_HUB_PART_NUMBER 0x99a1 - -/* Compat: Indicate which UV Hubs are supported. */ -#define UV1_HUB_IS_SUPPORTED 1 -#define UV2_HUB_IS_SUPPORTED 1 -#define UV3_HUB_IS_SUPPORTED 1 -#define UV4_HUB_IS_SUPPORTED 1 -#define UV4A_HUB_IS_SUPPORTED 1 +#define UV5_HUB_PART_NUMBER 0xa171 /* Error function to catch undefined references */ extern unsigned long uv_undefined(char *str); /* ========================================================================= */ -/* UVH_BAU_DATA_BROADCAST */ -/* ========================================================================= */ -#define UVH_BAU_DATA_BROADCAST 0x61688UL - -#define UV1H_BAU_DATA_BROADCAST_32 0x440 -#define UV2H_BAU_DATA_BROADCAST_32 0x440 -#define UV3H_BAU_DATA_BROADCAST_32 0x440 -#define UV4H_BAU_DATA_BROADCAST_32 0x360 -#define UVH_BAU_DATA_BROADCAST_32 ( \ - is_uv1_hub() ? UV1H_BAU_DATA_BROADCAST_32 : \ - is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 : \ - is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 : \ - /*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32) - -#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 -#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL - - -union uvh_bau_data_broadcast_u { - unsigned long v; - struct uvh_bau_data_broadcast_s { - unsigned long enable:1; /* RW */ - unsigned long rsvd_1_63:63; - } s; -}; - -/* ========================================================================= */ -/* UVH_BAU_DATA_CONFIG */ -/* ========================================================================= */ -#define UVH_BAU_DATA_CONFIG 0x61680UL - -#define UV1H_BAU_DATA_CONFIG_32 0x438 -#define UV2H_BAU_DATA_CONFIG_32 0x438 -#define UV3H_BAU_DATA_CONFIG_32 0x438 -#define UV4H_BAU_DATA_CONFIG_32 0x358 -#define UVH_BAU_DATA_CONFIG_32 ( \ - is_uv1_hub() ? UV1H_BAU_DATA_CONFIG_32 : \ - is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 : \ - is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 : \ - /*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32) - -#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 -#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 -#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 -#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 -#define UVH_BAU_DATA_CONFIG_P_SHFT 13 -#define UVH_BAU_DATA_CONFIG_T_SHFT 15 -#define UVH_BAU_DATA_CONFIG_M_SHFT 16 -#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 -#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - - -union uvh_bau_data_config_u { - unsigned long v; - struct uvh_bau_data_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s; -}; - -/* ========================================================================= */ /* UVH_EVENT_OCCURRED0 */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0 0x70000UL -#define UVH_EVENT_OCCURRED0_32 0x5e8 +/* UVH common defines*/ #define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 -#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 #define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL -#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL - -#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 -#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 -#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 -#define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT 4 -#define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT 5 -#define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT 6 -#define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 -#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 -#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 -#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 -#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 -#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 -#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 -#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 -#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 -#define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 -#define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 -#define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 -#define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 -#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 -#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 -#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 -#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 -#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 -#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 -#define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT 43 -#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 -#define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT 45 -#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 -#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 -#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 -#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 -#define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 -#define UV1H_EVENT_OCCURRED0_RTC0_SHFT 51 -#define UV1H_EVENT_OCCURRED0_RTC1_SHFT 52 -#define UV1H_EVENT_OCCURRED0_RTC2_SHFT 53 -#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 -#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 -#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 -#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL -#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL -#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL -#define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL -#define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL -#define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL -#define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL -#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL -#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL -#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL -#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL -#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL -#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL -#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL -#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL -#define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL -#define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL -#define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL -#define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL -#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL -#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL -#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL -#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL -#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL -#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL -#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL -#define UV1H_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL -#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL -#define UV1H_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL -#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL -#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL -#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL -#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL -#define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL -#define UV1H_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL -#define UV1H_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL -#define UV1H_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL -#define UV1H_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL -#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL -#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +/* UVXH common defines */ #define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2 -#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 -#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 -#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 -#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 -#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 -#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 -#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 -#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 -#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 -#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 -#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 -#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 #define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL +#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 #define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL +#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 #define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL +#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 #define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL +#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 #define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL +#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 #define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL +#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 #define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL +#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 #define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL +#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL +#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL +#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL +#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 #define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL +#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 #define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL -#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 -#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 -#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 -#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 -#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 -#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 -#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 -#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 -#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 -#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 -#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 -#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 -#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 -#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 -#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 -#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 -#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 -#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 -#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 -#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 -#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 -#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 -#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 -#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 -#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL -#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL -#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL -#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL -#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL -#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL -#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL -#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL -#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL -#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL -#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL -#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL -#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL -#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL -#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL -#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL -#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL -#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL -#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL -#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL -#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL -#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL -#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL -#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL - -#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 -#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 -#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 -#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 -#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 -#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 -#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 -#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 -#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 -#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 -#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 -#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 -#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 -#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 -#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 -#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 -#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 -#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 -#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 -#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 -#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 -#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 -#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 -#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 -#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL -#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL -#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL -#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL -#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL -#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL -#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL -#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL -#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL -#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL -#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL -#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL -#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL -#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL -#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL -#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL -#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL -#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL -#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL -#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL -#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL -#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL -#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL -#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL - +/* UVYH common defines */ +#define UVYH_EVENT_OCCURRED0_KT_HCERR_SHFT 1 +#define UVYH_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL +#define UVYH_EVENT_OCCURRED0_RH0_HCERR_SHFT 2 +#define UVYH_EVENT_OCCURRED0_RH0_HCERR_MASK 0x0000000000000004UL +#define UVYH_EVENT_OCCURRED0_RH1_HCERR_SHFT 3 +#define UVYH_EVENT_OCCURRED0_RH1_HCERR_MASK 0x0000000000000008UL +#define UVYH_EVENT_OCCURRED0_LH0_HCERR_SHFT 4 +#define UVYH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000010UL +#define UVYH_EVENT_OCCURRED0_LH1_HCERR_SHFT 5 +#define UVYH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000020UL +#define UVYH_EVENT_OCCURRED0_LH2_HCERR_SHFT 6 +#define UVYH_EVENT_OCCURRED0_LH2_HCERR_MASK 0x0000000000000040UL +#define UVYH_EVENT_OCCURRED0_LH3_HCERR_SHFT 7 +#define UVYH_EVENT_OCCURRED0_LH3_HCERR_MASK 0x0000000000000080UL +#define UVYH_EVENT_OCCURRED0_XB_HCERR_SHFT 8 +#define UVYH_EVENT_OCCURRED0_XB_HCERR_MASK 0x0000000000000100UL +#define UVYH_EVENT_OCCURRED0_RDM_HCERR_SHFT 9 +#define UVYH_EVENT_OCCURRED0_RDM_HCERR_MASK 0x0000000000000200UL +#define UVYH_EVENT_OCCURRED0_NI0_HCERR_SHFT 10 +#define UVYH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000400UL +#define UVYH_EVENT_OCCURRED0_NI1_HCERR_SHFT 11 +#define UVYH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000800UL +#define UVYH_EVENT_OCCURRED0_LB_AOERR0_SHFT 12 +#define UVYH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000001000UL +#define UVYH_EVENT_OCCURRED0_KT_AOERR0_SHFT 13 +#define UVYH_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000002000UL +#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_SHFT 14 +#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_MASK 0x0000000000004000UL +#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_SHFT 15 +#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_MASK 0x0000000000008000UL +#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 16 +#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000010000UL +#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 17 +#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000020000UL +#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_SHFT 18 +#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_MASK 0x0000000000040000UL +#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_SHFT 19 +#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_MASK 0x0000000000080000UL +#define UVYH_EVENT_OCCURRED0_XB_AOERR0_SHFT 20 +#define UVYH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000100000UL +#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_SHFT 21 +#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_MASK 0x0000000000200000UL +#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_SHFT 22 +#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_MASK 0x0000000000400000UL +#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_SHFT 23 +#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_MASK 0x0000000000800000UL +#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 24 +#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000001000000UL +#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 25 +#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000002000000UL +#define UVYH_EVENT_OCCURRED0_LB_AOERR1_SHFT 26 +#define UVYH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000004000000UL +#define UVYH_EVENT_OCCURRED0_KT_AOERR1_SHFT 27 +#define UVYH_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000008000000UL +#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_SHFT 28 +#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_MASK 0x0000000010000000UL +#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_SHFT 29 +#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_MASK 0x0000000020000000UL +#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 30 +#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000040000000UL +#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 31 +#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000080000000UL +#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_SHFT 32 +#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_MASK 0x0000000100000000UL +#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_SHFT 33 +#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_MASK 0x0000000200000000UL +#define UVYH_EVENT_OCCURRED0_XB_AOERR1_SHFT 34 +#define UVYH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000400000000UL +#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_SHFT 35 +#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_MASK 0x0000000800000000UL +#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_SHFT 36 +#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_MASK 0x0000001000000000UL +#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_SHFT 37 +#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_MASK 0x0000002000000000UL +#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 38 +#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000004000000000UL +#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 39 +#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000008000000000UL +#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 40 +#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000010000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 41 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000020000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 42 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000040000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 43 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000080000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 44 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000100000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 45 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000200000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 46 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000400000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 47 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000800000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 48 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0001000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 49 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0002000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 50 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0004000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 51 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0008000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 52 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0010000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 53 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0020000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 54 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0040000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 55 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0080000000000000UL +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 56 +#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0100000000000000UL +#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 57 +#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0200000000000000UL +#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 58 +#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0400000000000000UL +#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 59 +#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0800000000000000UL +#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 60 +#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x1000000000000000UL +#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 61 +#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x2000000000000000UL + +/* UV4 unique defines */ #define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT 1 -#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10 -#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17 -#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18 -#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19 -#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20 -#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21 -#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22 -#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23 -#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24 -#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25 -#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26 -#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27 -#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28 -#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29 -#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30 -#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31 -#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32 -#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33 -#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34 -#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35 -#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36 -#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52 -#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53 -#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54 -#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55 -#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56 -#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57 -#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58 -#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59 -#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60 -#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61 -#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62 -#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63 #define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10 #define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17 #define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18 #define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19 #define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20 #define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21 #define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22 #define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23 #define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24 #define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25 #define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26 #define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27 #define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28 #define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29 #define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30 #define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31 #define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32 #define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33 #define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34 #define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35 #define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36 #define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37 #define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0002000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0004000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0008000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0010000000000000UL +#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0020000000000000UL +#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54 #define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0040000000000000UL +#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55 #define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0080000000000000UL +#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56 #define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0100000000000000UL +#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57 #define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0200000000000000UL +#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58 #define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0400000000000000UL +#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59 #define UV4H_EVENT_OCCURRED0_IPI_INT_MASK 0x0800000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60 #define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x1000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61 #define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x2000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62 #define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x4000000000000000UL +#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63 #define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x8000000000000000UL +/* UV3 unique defines */ +#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + +/* UV2 unique defines */ +#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK ( \ + is_uv(UV4) ? 0x1000000000000000UL : \ + is_uv(UV3) ? 0x0040000000000000UL : \ + is_uv(UV2) ? 0x0040000000000000UL : \ + 0) #define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT ( \ - is_uv1_hub() ? UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \ - is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \ - is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \ - /*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT) + is_uv(UV4) ? 60 : \ + is_uv(UV3) ? 54 : \ + is_uv(UV2) ? 54 : \ + -1) union uvh_event_occurred0_u { unsigned long v; + + /* UVH common struct */ struct uvh_event_occurred0_s { - unsigned long lb_hcerr:1; /* RW, W1C */ - unsigned long rsvd_1_10:10; - unsigned long rh_aoerr0:1; /* RW, W1C */ - unsigned long rsvd_12_63:52; + unsigned long lb_hcerr:1; /* RW */ + unsigned long rsvd_1_63:63; } s; + + /* UVXH common struct */ struct uvxh_event_occurred0_s { unsigned long lb_hcerr:1; /* RW */ unsigned long rsvd_1:1; @@ -645,6 +577,142 @@ union uvh_event_occurred0_u { unsigned long xb_aoerr0:1; /* RW */ unsigned long rsvd_17_63:47; } sx; + + /* UVYH common struct */ + struct uvyh_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long kt_hcerr:1; /* RW */ + unsigned long rh0_hcerr:1; /* RW */ + unsigned long rh1_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long lh2_hcerr:1; /* RW */ + unsigned long lh3_hcerr:1; /* RW */ + unsigned long xb_hcerr:1; /* RW */ + unsigned long rdm_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long kt_aoerr0:1; /* RW */ + unsigned long rh0_aoerr0:1; /* RW */ + unsigned long rh1_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long lh2_aoerr0:1; /* RW */ + unsigned long lh3_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rdm_aoerr0:1; /* RW */ + unsigned long rt0_aoerr0:1; /* RW */ + unsigned long rt1_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long kt_aoerr1:1; /* RW */ + unsigned long rh0_aoerr1:1; /* RW */ + unsigned long rh1_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long lh2_aoerr1:1; /* RW */ + unsigned long lh3_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rdm_aoerr1:1; /* RW */ + unsigned long rt0_aoerr1:1; /* RW */ + unsigned long rt1_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long rsvd_62_63:2; + } sy; + + /* UV5 unique struct */ + struct uv5h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long kt_hcerr:1; /* RW */ + unsigned long rh0_hcerr:1; /* RW */ + unsigned long rh1_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long lh2_hcerr:1; /* RW */ + unsigned long lh3_hcerr:1; /* RW */ + unsigned long xb_hcerr:1; /* RW */ + unsigned long rdm_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long kt_aoerr0:1; /* RW */ + unsigned long rh0_aoerr0:1; /* RW */ + unsigned long rh1_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long lh2_aoerr0:1; /* RW */ + unsigned long lh3_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rdm_aoerr0:1; /* RW */ + unsigned long rt0_aoerr0:1; /* RW */ + unsigned long rt1_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long kt_aoerr1:1; /* RW */ + unsigned long rh0_aoerr1:1; /* RW */ + unsigned long rh1_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long lh2_aoerr1:1; /* RW */ + unsigned long lh3_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rdm_aoerr1:1; /* RW */ + unsigned long rt0_aoerr1:1; /* RW */ + unsigned long rt1_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long rsvd_62_63:2; + } s5; + + /* UV4 unique struct */ struct uv4h_event_occurred0_s { unsigned long lb_hcerr:1; /* RW */ unsigned long kt_hcerr:1; /* RW */ @@ -711,13 +779,1355 @@ union uvh_event_occurred0_u { unsigned long extio_int2:1; /* RW */ unsigned long extio_int3:1; /* RW */ } s4; + + /* UV3 unique struct */ + struct uv3h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long qp_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long qp_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rt_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long qp_aoerr1:1; /* RW */ + unsigned long rh_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long gr0_aoerr1:1; /* RW */ + unsigned long gr1_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rt_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long rsvd_59_63:5; + } s3; + + /* UV2 unique struct */ + struct uv2h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long qp_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long qp_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rt_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long qp_aoerr1:1; /* RW */ + unsigned long rh_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long gr0_aoerr1:1; /* RW */ + unsigned long gr1_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rt_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long rsvd_59_63:5; + } s2; }; /* ========================================================================= */ /* UVH_EVENT_OCCURRED0_ALIAS */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL -#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 + + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED1 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED1 0x70080UL + + + +/* UVYH common defines */ +#define UVYH_EVENT_OCCURRED1_IPI_INT_SHFT 0 +#define UVYH_EVENT_OCCURRED1_IPI_INT_MASK 0x0000000000000001UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_SHFT 1 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_MASK 0x0000000000000002UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_SHFT 2 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_MASK 0x0000000000000004UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_SHFT 3 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_MASK 0x0000000000000008UL +#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_SHFT 4 +#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_MASK 0x0000000000000010UL +#define UVYH_EVENT_OCCURRED1_PROFILE_INT_SHFT 5 +#define UVYH_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000020UL +#define UVYH_EVENT_OCCURRED1_BAU_DATA_SHFT 6 +#define UVYH_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000040UL +#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_SHFT 7 +#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000080UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_SHFT 8 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_MASK 0x0000000000000100UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_SHFT 9 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_MASK 0x0000000000000200UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_SHFT 10 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_MASK 0x0000000000000400UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_SHFT 11 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_MASK 0x0000000000000800UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_SHFT 12 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_MASK 0x0000000000001000UL +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_SHFT 13 +#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_MASK 0x0000000000002000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_SHFT 14 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_MASK 0x0000000000004000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_SHFT 15 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_MASK 0x0000000000008000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_SHFT 16 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_MASK 0x0000000000010000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_SHFT 17 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_MASK 0x0000000000020000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_SHFT 18 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_MASK 0x0000000000040000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_SHFT 19 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_MASK 0x0000000000080000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_SHFT 20 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_MASK 0x0000000000100000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_SHFT 21 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_MASK 0x0000000000200000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_SHFT 22 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_MASK 0x0000000000400000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_SHFT 23 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_MASK 0x0000000000800000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_SHFT 24 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_MASK 0x0000000001000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_SHFT 25 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_MASK 0x0000000002000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_SHFT 26 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_MASK 0x0000000004000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_SHFT 27 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_MASK 0x0000000008000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_SHFT 28 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_MASK 0x0000000010000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_SHFT 29 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_MASK 0x0000000020000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_SHFT 30 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_MASK 0x0000000040000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_SHFT 31 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_MASK 0x0000000080000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_SHFT 32 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_MASK 0x0000000100000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_SHFT 33 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_MASK 0x0000000200000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_SHFT 34 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_MASK 0x0000000400000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_SHFT 35 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_MASK 0x0000000800000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_SHFT 36 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_MASK 0x0000001000000000UL +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_SHFT 37 +#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_MASK 0x0000002000000000UL + +/* UV4 unique defines */ +#define UV4H_EVENT_OCCURRED1_PROFILE_INT_SHFT 0 +#define UV4H_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000001UL +#define UV4H_EVENT_OCCURRED1_BAU_DATA_SHFT 1 +#define UV4H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_SHFT 2 +#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000004UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 3 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000000008UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 4 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000000010UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 5 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000000020UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 6 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000000040UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 7 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000000080UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 8 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000000100UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 9 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000000000200UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 10 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 11 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000000000800UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 12 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000000001000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 13 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000000002000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 14 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000000004000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 15 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000000008000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 16 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000000010000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 17 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 18 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_SHFT 19 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_SHFT 20 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_SHFT 21 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_SHFT 22 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_SHFT 23 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_SHFT 24 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_SHFT 25 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_SHFT 26 +#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 27 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 28 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 29 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 30 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 31 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 32 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 33 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 34 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 35 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 36 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 37 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 38 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 39 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 40 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 41 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 42 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_SHFT 43 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_SHFT 44 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_SHFT 45 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_SHFT 46 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_SHFT 47 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_SHFT 48 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_SHFT 49 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_MASK 0x0002000000000000UL +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_SHFT 50 +#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_MASK 0x0004000000000000UL + +/* UV3 unique defines */ +#define UV3H_EVENT_OCCURRED1_BAU_DATA_SHFT 0 +#define UV3H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL +#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1 +#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17 +#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33 +#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49 +#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL +#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50 +#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL +#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51 +#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL + +/* UV2 unique defines */ +#define UV2H_EVENT_OCCURRED1_BAU_DATA_SHFT 0 +#define UV2H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1 +#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17 +#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33 +#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49 +#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL +#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50 +#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL +#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51 +#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL + +#define UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK ( \ + is_uv(UV5) ? 0x0000000000000002UL : \ + 0) +#define UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT ( \ + is_uv(UV5) ? 1 : \ + -1) + +union uvyh_event_occurred1_u { + unsigned long v; + + /* UVYH common struct */ + struct uvyh_event_occurred1_s { + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long bau_data:1; /* RW */ + unsigned long proc_general:1; /* RW */ + unsigned long xh_tlb_int0:1; /* RW */ + unsigned long xh_tlb_int1:1; /* RW */ + unsigned long xh_tlb_int2:1; /* RW */ + unsigned long xh_tlb_int3:1; /* RW */ + unsigned long xh_tlb_int4:1; /* RW */ + unsigned long xh_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int0:1; /* RW */ + unsigned long rdm_tlb_int1:1; /* RW */ + unsigned long rdm_tlb_int2:1; /* RW */ + unsigned long rdm_tlb_int3:1; /* RW */ + unsigned long rdm_tlb_int4:1; /* RW */ + unsigned long rdm_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int6:1; /* RW */ + unsigned long rdm_tlb_int7:1; /* RW */ + unsigned long rdm_tlb_int8:1; /* RW */ + unsigned long rdm_tlb_int9:1; /* RW */ + unsigned long rdm_tlb_int10:1; /* RW */ + unsigned long rdm_tlb_int11:1; /* RW */ + unsigned long rdm_tlb_int12:1; /* RW */ + unsigned long rdm_tlb_int13:1; /* RW */ + unsigned long rdm_tlb_int14:1; /* RW */ + unsigned long rdm_tlb_int15:1; /* RW */ + unsigned long rdm_tlb_int16:1; /* RW */ + unsigned long rdm_tlb_int17:1; /* RW */ + unsigned long rdm_tlb_int18:1; /* RW */ + unsigned long rdm_tlb_int19:1; /* RW */ + unsigned long rdm_tlb_int20:1; /* RW */ + unsigned long rdm_tlb_int21:1; /* RW */ + unsigned long rdm_tlb_int22:1; /* RW */ + unsigned long rdm_tlb_int23:1; /* RW */ + unsigned long rsvd_38_63:26; + } sy; + + /* UV5 unique struct */ + struct uv5h_event_occurred1_s { + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long bau_data:1; /* RW */ + unsigned long proc_general:1; /* RW */ + unsigned long xh_tlb_int0:1; /* RW */ + unsigned long xh_tlb_int1:1; /* RW */ + unsigned long xh_tlb_int2:1; /* RW */ + unsigned long xh_tlb_int3:1; /* RW */ + unsigned long xh_tlb_int4:1; /* RW */ + unsigned long xh_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int0:1; /* RW */ + unsigned long rdm_tlb_int1:1; /* RW */ + unsigned long rdm_tlb_int2:1; /* RW */ + unsigned long rdm_tlb_int3:1; /* RW */ + unsigned long rdm_tlb_int4:1; /* RW */ + unsigned long rdm_tlb_int5:1; /* RW */ + unsigned long rdm_tlb_int6:1; /* RW */ + unsigned long rdm_tlb_int7:1; /* RW */ + unsigned long rdm_tlb_int8:1; /* RW */ + unsigned long rdm_tlb_int9:1; /* RW */ + unsigned long rdm_tlb_int10:1; /* RW */ + unsigned long rdm_tlb_int11:1; /* RW */ + unsigned long rdm_tlb_int12:1; /* RW */ + unsigned long rdm_tlb_int13:1; /* RW */ + unsigned long rdm_tlb_int14:1; /* RW */ + unsigned long rdm_tlb_int15:1; /* RW */ + unsigned long rdm_tlb_int16:1; /* RW */ + unsigned long rdm_tlb_int17:1; /* RW */ + unsigned long rdm_tlb_int18:1; /* RW */ + unsigned long rdm_tlb_int19:1; /* RW */ + unsigned long rdm_tlb_int20:1; /* RW */ + unsigned long rdm_tlb_int21:1; /* RW */ + unsigned long rdm_tlb_int22:1; /* RW */ + unsigned long rdm_tlb_int23:1; /* RW */ + unsigned long rsvd_38_63:26; + } s5; + + /* UV4 unique struct */ + struct uv4h_event_occurred1_s { + unsigned long profile_int:1; /* RW */ + unsigned long bau_data:1; /* RW */ + unsigned long proc_general:1; /* RW */ + unsigned long gr0_tlb_int0:1; /* RW */ + unsigned long gr0_tlb_int1:1; /* RW */ + unsigned long gr0_tlb_int2:1; /* RW */ + unsigned long gr0_tlb_int3:1; /* RW */ + unsigned long gr0_tlb_int4:1; /* RW */ + unsigned long gr0_tlb_int5:1; /* RW */ + unsigned long gr0_tlb_int6:1; /* RW */ + unsigned long gr0_tlb_int7:1; /* RW */ + unsigned long gr0_tlb_int8:1; /* RW */ + unsigned long gr0_tlb_int9:1; /* RW */ + unsigned long gr0_tlb_int10:1; /* RW */ + unsigned long gr0_tlb_int11:1; /* RW */ + unsigned long gr0_tlb_int12:1; /* RW */ + unsigned long gr0_tlb_int13:1; /* RW */ + unsigned long gr0_tlb_int14:1; /* RW */ + unsigned long gr0_tlb_int15:1; /* RW */ + unsigned long gr0_tlb_int16:1; /* RW */ + unsigned long gr0_tlb_int17:1; /* RW */ + unsigned long gr0_tlb_int18:1; /* RW */ + unsigned long gr0_tlb_int19:1; /* RW */ + unsigned long gr0_tlb_int20:1; /* RW */ + unsigned long gr0_tlb_int21:1; /* RW */ + unsigned long gr0_tlb_int22:1; /* RW */ + unsigned long gr0_tlb_int23:1; /* RW */ + unsigned long gr1_tlb_int0:1; /* RW */ + unsigned long gr1_tlb_int1:1; /* RW */ + unsigned long gr1_tlb_int2:1; /* RW */ + unsigned long gr1_tlb_int3:1; /* RW */ + unsigned long gr1_tlb_int4:1; /* RW */ + unsigned long gr1_tlb_int5:1; /* RW */ + unsigned long gr1_tlb_int6:1; /* RW */ + unsigned long gr1_tlb_int7:1; /* RW */ + unsigned long gr1_tlb_int8:1; /* RW */ + unsigned long gr1_tlb_int9:1; /* RW */ + unsigned long gr1_tlb_int10:1; /* RW */ + unsigned long gr1_tlb_int11:1; /* RW */ + unsigned long gr1_tlb_int12:1; /* RW */ + unsigned long gr1_tlb_int13:1; /* RW */ + unsigned long gr1_tlb_int14:1; /* RW */ + unsigned long gr1_tlb_int15:1; /* RW */ + unsigned long gr1_tlb_int16:1; /* RW */ + unsigned long gr1_tlb_int17:1; /* RW */ + unsigned long gr1_tlb_int18:1; /* RW */ + unsigned long gr1_tlb_int19:1; /* RW */ + unsigned long gr1_tlb_int20:1; /* RW */ + unsigned long gr1_tlb_int21:1; /* RW */ + unsigned long gr1_tlb_int22:1; /* RW */ + unsigned long gr1_tlb_int23:1; /* RW */ + unsigned long rsvd_51_63:13; + } s4; + + /* UV3 unique struct */ + struct uv3h_event_occurred1_s { + unsigned long bau_data:1; /* RW */ + unsigned long power_management_req:1; /* RW */ + unsigned long message_accelerator_int0:1; /* RW */ + unsigned long message_accelerator_int1:1; /* RW */ + unsigned long message_accelerator_int2:1; /* RW */ + unsigned long message_accelerator_int3:1; /* RW */ + unsigned long message_accelerator_int4:1; /* RW */ + unsigned long message_accelerator_int5:1; /* RW */ + unsigned long message_accelerator_int6:1; /* RW */ + unsigned long message_accelerator_int7:1; /* RW */ + unsigned long message_accelerator_int8:1; /* RW */ + unsigned long message_accelerator_int9:1; /* RW */ + unsigned long message_accelerator_int10:1; /* RW */ + unsigned long message_accelerator_int11:1; /* RW */ + unsigned long message_accelerator_int12:1; /* RW */ + unsigned long message_accelerator_int13:1; /* RW */ + unsigned long message_accelerator_int14:1; /* RW */ + unsigned long message_accelerator_int15:1; /* RW */ + unsigned long gr0_tlb_int0:1; /* RW */ + unsigned long gr0_tlb_int1:1; /* RW */ + unsigned long gr0_tlb_int2:1; /* RW */ + unsigned long gr0_tlb_int3:1; /* RW */ + unsigned long gr0_tlb_int4:1; /* RW */ + unsigned long gr0_tlb_int5:1; /* RW */ + unsigned long gr0_tlb_int6:1; /* RW */ + unsigned long gr0_tlb_int7:1; /* RW */ + unsigned long gr0_tlb_int8:1; /* RW */ + unsigned long gr0_tlb_int9:1; /* RW */ + unsigned long gr0_tlb_int10:1; /* RW */ + unsigned long gr0_tlb_int11:1; /* RW */ + unsigned long gr0_tlb_int12:1; /* RW */ + unsigned long gr0_tlb_int13:1; /* RW */ + unsigned long gr0_tlb_int14:1; /* RW */ + unsigned long gr0_tlb_int15:1; /* RW */ + unsigned long gr1_tlb_int0:1; /* RW */ + unsigned long gr1_tlb_int1:1; /* RW */ + unsigned long gr1_tlb_int2:1; /* RW */ + unsigned long gr1_tlb_int3:1; /* RW */ + unsigned long gr1_tlb_int4:1; /* RW */ + unsigned long gr1_tlb_int5:1; /* RW */ + unsigned long gr1_tlb_int6:1; /* RW */ + unsigned long gr1_tlb_int7:1; /* RW */ + unsigned long gr1_tlb_int8:1; /* RW */ + unsigned long gr1_tlb_int9:1; /* RW */ + unsigned long gr1_tlb_int10:1; /* RW */ + unsigned long gr1_tlb_int11:1; /* RW */ + unsigned long gr1_tlb_int12:1; /* RW */ + unsigned long gr1_tlb_int13:1; /* RW */ + unsigned long gr1_tlb_int14:1; /* RW */ + unsigned long gr1_tlb_int15:1; /* RW */ + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rsvd_52_63:12; + } s3; + + /* UV2 unique struct */ + struct uv2h_event_occurred1_s { + unsigned long bau_data:1; /* RW */ + unsigned long power_management_req:1; /* RW */ + unsigned long message_accelerator_int0:1; /* RW */ + unsigned long message_accelerator_int1:1; /* RW */ + unsigned long message_accelerator_int2:1; /* RW */ + unsigned long message_accelerator_int3:1; /* RW */ + unsigned long message_accelerator_int4:1; /* RW */ + unsigned long message_accelerator_int5:1; /* RW */ + unsigned long message_accelerator_int6:1; /* RW */ + unsigned long message_accelerator_int7:1; /* RW */ + unsigned long message_accelerator_int8:1; /* RW */ + unsigned long message_accelerator_int9:1; /* RW */ + unsigned long message_accelerator_int10:1; /* RW */ + unsigned long message_accelerator_int11:1; /* RW */ + unsigned long message_accelerator_int12:1; /* RW */ + unsigned long message_accelerator_int13:1; /* RW */ + unsigned long message_accelerator_int14:1; /* RW */ + unsigned long message_accelerator_int15:1; /* RW */ + unsigned long gr0_tlb_int0:1; /* RW */ + unsigned long gr0_tlb_int1:1; /* RW */ + unsigned long gr0_tlb_int2:1; /* RW */ + unsigned long gr0_tlb_int3:1; /* RW */ + unsigned long gr0_tlb_int4:1; /* RW */ + unsigned long gr0_tlb_int5:1; /* RW */ + unsigned long gr0_tlb_int6:1; /* RW */ + unsigned long gr0_tlb_int7:1; /* RW */ + unsigned long gr0_tlb_int8:1; /* RW */ + unsigned long gr0_tlb_int9:1; /* RW */ + unsigned long gr0_tlb_int10:1; /* RW */ + unsigned long gr0_tlb_int11:1; /* RW */ + unsigned long gr0_tlb_int12:1; /* RW */ + unsigned long gr0_tlb_int13:1; /* RW */ + unsigned long gr0_tlb_int14:1; /* RW */ + unsigned long gr0_tlb_int15:1; /* RW */ + unsigned long gr1_tlb_int0:1; /* RW */ + unsigned long gr1_tlb_int1:1; /* RW */ + unsigned long gr1_tlb_int2:1; /* RW */ + unsigned long gr1_tlb_int3:1; /* RW */ + unsigned long gr1_tlb_int4:1; /* RW */ + unsigned long gr1_tlb_int5:1; /* RW */ + unsigned long gr1_tlb_int6:1; /* RW */ + unsigned long gr1_tlb_int7:1; /* RW */ + unsigned long gr1_tlb_int8:1; /* RW */ + unsigned long gr1_tlb_int9:1; /* RW */ + unsigned long gr1_tlb_int10:1; /* RW */ + unsigned long gr1_tlb_int11:1; /* RW */ + unsigned long gr1_tlb_int12:1; /* RW */ + unsigned long gr1_tlb_int13:1; /* RW */ + unsigned long gr1_tlb_int14:1; /* RW */ + unsigned long gr1_tlb_int15:1; /* RW */ + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rsvd_52_63:12; + } s2; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED1_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED1_ALIAS 0x70088UL + + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED2 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED2 0x70100UL + + + +/* UVYH common defines */ +#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 0 +#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000000001UL +#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 1 +#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000000002UL +#define UVYH_EVENT_OCCURRED2_RTC_0_SHFT 2 +#define UVYH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000004UL +#define UVYH_EVENT_OCCURRED2_RTC_1_SHFT 3 +#define UVYH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000008UL +#define UVYH_EVENT_OCCURRED2_RTC_2_SHFT 4 +#define UVYH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000010UL +#define UVYH_EVENT_OCCURRED2_RTC_3_SHFT 5 +#define UVYH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000020UL +#define UVYH_EVENT_OCCURRED2_RTC_4_SHFT 6 +#define UVYH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000040UL +#define UVYH_EVENT_OCCURRED2_RTC_5_SHFT 7 +#define UVYH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000080UL +#define UVYH_EVENT_OCCURRED2_RTC_6_SHFT 8 +#define UVYH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000100UL +#define UVYH_EVENT_OCCURRED2_RTC_7_SHFT 9 +#define UVYH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000200UL +#define UVYH_EVENT_OCCURRED2_RTC_8_SHFT 10 +#define UVYH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000400UL +#define UVYH_EVENT_OCCURRED2_RTC_9_SHFT 11 +#define UVYH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000800UL +#define UVYH_EVENT_OCCURRED2_RTC_10_SHFT 12 +#define UVYH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000001000UL +#define UVYH_EVENT_OCCURRED2_RTC_11_SHFT 13 +#define UVYH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000002000UL +#define UVYH_EVENT_OCCURRED2_RTC_12_SHFT 14 +#define UVYH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000004000UL +#define UVYH_EVENT_OCCURRED2_RTC_13_SHFT 15 +#define UVYH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000008000UL +#define UVYH_EVENT_OCCURRED2_RTC_14_SHFT 16 +#define UVYH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000010000UL +#define UVYH_EVENT_OCCURRED2_RTC_15_SHFT 17 +#define UVYH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000020000UL +#define UVYH_EVENT_OCCURRED2_RTC_16_SHFT 18 +#define UVYH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000040000UL +#define UVYH_EVENT_OCCURRED2_RTC_17_SHFT 19 +#define UVYH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000080000UL +#define UVYH_EVENT_OCCURRED2_RTC_18_SHFT 20 +#define UVYH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000100000UL +#define UVYH_EVENT_OCCURRED2_RTC_19_SHFT 21 +#define UVYH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000200000UL +#define UVYH_EVENT_OCCURRED2_RTC_20_SHFT 22 +#define UVYH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000400000UL +#define UVYH_EVENT_OCCURRED2_RTC_21_SHFT 23 +#define UVYH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000800000UL +#define UVYH_EVENT_OCCURRED2_RTC_22_SHFT 24 +#define UVYH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000001000000UL +#define UVYH_EVENT_OCCURRED2_RTC_23_SHFT 25 +#define UVYH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000002000000UL +#define UVYH_EVENT_OCCURRED2_RTC_24_SHFT 26 +#define UVYH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000004000000UL +#define UVYH_EVENT_OCCURRED2_RTC_25_SHFT 27 +#define UVYH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000008000000UL +#define UVYH_EVENT_OCCURRED2_RTC_26_SHFT 28 +#define UVYH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000010000000UL +#define UVYH_EVENT_OCCURRED2_RTC_27_SHFT 29 +#define UVYH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000020000000UL +#define UVYH_EVENT_OCCURRED2_RTC_28_SHFT 30 +#define UVYH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000040000000UL +#define UVYH_EVENT_OCCURRED2_RTC_29_SHFT 31 +#define UVYH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000080000000UL +#define UVYH_EVENT_OCCURRED2_RTC_30_SHFT 32 +#define UVYH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000100000000UL +#define UVYH_EVENT_OCCURRED2_RTC_31_SHFT 33 +#define UVYH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000200000000UL + +/* UV4 unique defines */ +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15 +#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL +#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16 +#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL +#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17 +#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL +#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18 +#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL +#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19 +#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL +#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20 +#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL +#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21 +#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL +#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22 +#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL +#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23 +#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL +#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24 +#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL +#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25 +#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL +#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26 +#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL +#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27 +#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL +#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28 +#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL +#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29 +#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL +#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30 +#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL +#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31 +#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL +#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32 +#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL +#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33 +#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL +#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34 +#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL +#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35 +#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL +#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36 +#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37 +#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38 +#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39 +#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40 +#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41 +#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42 +#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43 +#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44 +#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45 +#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46 +#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47 +#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48 +#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL +#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49 +#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL + +/* UV3 unique defines */ +#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +/* UV2 unique defines */ +#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +#define UVH_EVENT_OCCURRED2_RTC_1_MASK ( \ + is_uv(UV5) ? 0x0000000000000008UL : \ + is_uv(UV4) ? 0x0000000000080000UL : \ + is_uv(UV3) ? 0x0000000000000002UL : \ + is_uv(UV2) ? 0x0000000000000002UL : \ + 0) +#define UVH_EVENT_OCCURRED2_RTC_1_SHFT ( \ + is_uv(UV5) ? 3 : \ + is_uv(UV4) ? 19 : \ + is_uv(UV3) ? 1 : \ + is_uv(UV2) ? 1 : \ + -1) + +union uvyh_event_occurred2_u { + unsigned long v; + + /* UVYH common struct */ + struct uvyh_event_occurred2_s { + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_34_63:30; + } sy; + + /* UV5 unique struct */ + struct uv5h_event_occurred2_s { + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_34_63:30; + } s5; + + /* UV4 unique struct */ + struct uv4h_event_occurred2_s { + unsigned long message_accelerator_int0:1; /* RW */ + unsigned long message_accelerator_int1:1; /* RW */ + unsigned long message_accelerator_int2:1; /* RW */ + unsigned long message_accelerator_int3:1; /* RW */ + unsigned long message_accelerator_int4:1; /* RW */ + unsigned long message_accelerator_int5:1; /* RW */ + unsigned long message_accelerator_int6:1; /* RW */ + unsigned long message_accelerator_int7:1; /* RW */ + unsigned long message_accelerator_int8:1; /* RW */ + unsigned long message_accelerator_int9:1; /* RW */ + unsigned long message_accelerator_int10:1; /* RW */ + unsigned long message_accelerator_int11:1; /* RW */ + unsigned long message_accelerator_int12:1; /* RW */ + unsigned long message_accelerator_int13:1; /* RW */ + unsigned long message_accelerator_int14:1; /* RW */ + unsigned long message_accelerator_int15:1; /* RW */ + unsigned long rtc_interval_int:1; /* RW */ + unsigned long bau_dashboard_int:1; /* RW */ + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_50_63:14; + } s4; + + /* UV3 unique struct */ + struct uv3h_event_occurred2_s { + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_32_63:32; + } s3; + + /* UV2 unique struct */ + struct uv2h_event_occurred2_s { + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_32_63:32; + } s2; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED2_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED2_ALIAS 0x70108UL /* ========================================================================= */ @@ -725,53 +2135,148 @@ union uvh_event_occurred0_u { /* ========================================================================= */ #define UVH_EXTIO_INT0_BROADCAST 0x61448UL -#define UV1H_EXTIO_INT0_BROADCAST_32 0x3f0 -#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0 -#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0 -#define UV4H_EXTIO_INT0_BROADCAST_32 0x310 -#define UVH_EXTIO_INT0_BROADCAST_32 ( \ - is_uv1_hub() ? UV1H_EXTIO_INT0_BROADCAST_32 : \ - is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 : \ - is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 : \ - /*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32) - +/* UVH common defines*/ #define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0 #define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL union uvh_extio_int0_broadcast_u { unsigned long v; + + /* UVH common struct */ struct uvh_extio_int0_broadcast_s { unsigned long enable:1; /* RW */ unsigned long rsvd_1_63:63; } s; + + /* UV5 unique struct */ + struct uv5h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s5; + + /* UV4 unique struct */ + struct uv4h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s4; + + /* UV3 unique struct */ + struct uv3h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s3; + + /* UV2 unique struct */ + struct uv2h_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s2; +}; + +/* ========================================================================= */ +/* UVH_GR0_GAM_GR_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_GAM_GR_CONFIG ( \ + is_uv(UV5) ? 0x600028UL : \ + is_uv(UV4) ? 0x600028UL : \ + is_uv(UV3) ? 0xc00028UL : \ + is_uv(UV2) ? 0xc00028UL : \ + 0) + + + +/* UVYH common defines */ +#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10 +#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL + +/* UV4 unique defines */ +#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10 +#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL + +/* UV3 unique defines */ +#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT 0 +#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL +#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10 +#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL + +/* UV2 unique defines */ +#define UV2H_GR0_GAM_GR_CONFIG_N_GR_SHFT 0 +#define UV2H_GR0_GAM_GR_CONFIG_N_GR_MASK 0x000000000000000fUL + + +union uvyh_gr0_gam_gr_config_u { + unsigned long v; + + /* UVYH common struct */ + struct uvyh_gr0_gam_gr_config_s { + unsigned long rsvd_0_9:10; + unsigned long subspace:1; /* RW */ + unsigned long rsvd_11_63:53; + } sy; + + /* UV5 unique struct */ + struct uv5h_gr0_gam_gr_config_s { + unsigned long rsvd_0_9:10; + unsigned long subspace:1; /* RW */ + unsigned long rsvd_11_63:53; + } s5; + + /* UV4 unique struct */ + struct uv4h_gr0_gam_gr_config_s { + unsigned long rsvd_0_9:10; + unsigned long subspace:1; /* RW */ + unsigned long rsvd_11_63:53; + } s4; + + /* UV3 unique struct */ + struct uv3h_gr0_gam_gr_config_s { + unsigned long m_skt:6; /* RW */ + unsigned long undef_6_9:4; /* Undefined */ + unsigned long subspace:1; /* RW */ + unsigned long reserved:53; + } s3; + + /* UV2 unique struct */ + struct uv2h_gr0_gam_gr_config_s { + unsigned long n_gr:4; /* RW */ + unsigned long reserved:60; + } s2; }; /* ========================================================================= */ /* UVH_GR0_TLB_INT0_CONFIG */ /* ========================================================================= */ -#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL - -#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 -#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13 -#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15 -#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16 -#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UVH_GR0_TLB_INT0_CONFIG ( \ + is_uv(UV4) ? 0x61b00UL : \ + is_uv(UV3) ? 0x61b00UL : \ + is_uv(UV2) ? 0x61b00UL : \ + uv_undefined("UVH_GR0_TLB_INT0_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL union uvh_gr0_tlb_int0_config_u { unsigned long v; + + /* UVH common struct */ struct uvh_gr0_tlb_int0_config_s { unsigned long vector_:8; /* RW */ unsigned long dm:3; /* RW */ @@ -784,33 +2289,97 @@ union uvh_gr0_tlb_int0_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; + + /* UVXH common struct */ + struct uvxh_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; }; /* ========================================================================= */ /* UVH_GR0_TLB_INT1_CONFIG */ /* ========================================================================= */ -#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL - -#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 -#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13 -#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15 -#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16 -#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UVH_GR0_TLB_INT1_CONFIG ( \ + is_uv(UV4) ? 0x61b40UL : \ + is_uv(UV3) ? 0x61b40UL : \ + is_uv(UV2) ? 0x61b40UL : \ + uv_undefined("UVH_GR0_TLB_INT1_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL union uvh_gr0_tlb_int1_config_u { unsigned long v; + + /* UVH common struct */ struct uvh_gr0_tlb_int1_config_s { unsigned long vector_:8; /* RW */ unsigned long dm:3; /* RW */ @@ -823,466 +2392,97 @@ union uvh_gr0_tlb_int1_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; -}; -/* ========================================================================= */ -/* UVH_GR0_TLB_MMR_CONTROL */ -/* ========================================================================= */ -#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL -#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL -#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL -#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL -#define UVH_GR0_TLB_MMR_CONTROL ( \ - is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \ - is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \ - is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL : \ - /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL) - -#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL - -#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT 54 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT 56 -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT 60 -#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL -#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK 0x0040000000000000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL -#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL - -#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL - -#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 -#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL -#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL -#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL - -#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL -#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL - -#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 13 -#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59 -#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL -#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL -#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL -#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL - -#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK ( \ - is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \ - is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \ - is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \ - /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK) -#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK ( \ - is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \ - is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \ - is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \ - /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK) -#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT ( \ - is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \ - is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \ - is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \ - /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT) - -union uvh_gr0_tlb_mmr_control_u { - unsigned long v; - struct uvh_gr0_tlb_mmr_control_s { - unsigned long rsvd_0_15:16; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long rsvd_32_48:17; - unsigned long rsvd_49_51:3; - unsigned long rsvd_52_63:12; - } s; - struct uv1h_gr0_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long rsvd_32_47:16; - unsigned long mmr_inj_con:1; /* RW */ - unsigned long rsvd_49_51:3; - unsigned long mmr_inj_tlbram:1; /* RW */ - unsigned long rsvd_53:1; - unsigned long mmr_inj_tlbpgsize:1; /* RW */ - unsigned long rsvd_55:1; - unsigned long mmr_inj_tlbrreg:1; /* RW */ - unsigned long rsvd_57_59:3; - unsigned long mmr_inj_tlblruv:1; /* RW */ - unsigned long rsvd_61_63:3; - } s1; - struct uvxh_gr0_tlb_mmr_control_s { - unsigned long rsvd_0_15:16; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long rsvd_48:1; - unsigned long rsvd_49_51:3; - unsigned long rsvd_52_63:12; + /* UVXH common struct */ + struct uvxh_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } sx; - struct uv2h_gr0_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long mmr_inj_con:1; /* RW */ - unsigned long rsvd_49_51:3; - unsigned long mmr_inj_tlbram:1; /* RW */ - unsigned long rsvd_53_63:11; - } s2; - struct uv3h_gr0_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long ecc_sel:1; /* RW */ - unsigned long rsvd_22_29:8; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long undef_48:1; /* Undefined */ - unsigned long rsvd_49_51:3; - unsigned long undef_52:1; /* Undefined */ - unsigned long rsvd_53_63:11; - } s3; - struct uv4h_gr0_tlb_mmr_control_s { - unsigned long index:13; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_15:1; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long ecc_sel:1; /* RW */ - unsigned long rsvd_22_29:8; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long undef_48:1; /* Undefined */ - unsigned long rsvd_49_51:3; - unsigned long rsvd_52_58:7; - unsigned long page_size:5; /* RW */ + + /* UV4 unique struct */ + struct uv4h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } s4; -}; -/* ========================================================================= */ -/* UVH_GR0_TLB_MMR_READ_DATA_HI */ -/* ========================================================================= */ -#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL -#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL -#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL -#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL -#define UVH_GR0_TLB_MMR_READ_DATA_HI ( \ - is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \ - is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \ - is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI : \ - /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI) - -#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 - -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL - -#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 - -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL - -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45 -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL -#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL - -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT 34 -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 49 -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51 -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52 -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53 -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL -#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL - - -union uvh_gr0_tlb_mmr_read_data_hi_u { - unsigned long v; - struct uv1h_gr0_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } s1; - struct uv2h_gr0_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } s2; - struct uv3h_gr0_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long aa_ext:1; /* RO */ - unsigned long undef_46_54:9; /* Undefined */ - unsigned long way_ecc:9; /* RO */ + /* UV3 unique struct */ + struct uv3h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } s3; - struct uv4h_gr0_tlb_mmr_read_data_hi_s { - unsigned long pfn:34; /* RO */ - unsigned long pnid:15; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long aa_ext:1; /* RO */ - unsigned long undef_54:1; /* Undefined */ - unsigned long way_ecc:9; /* RO */ - } s4; -}; -/* ========================================================================= */ -/* UVH_GR0_TLB_MMR_READ_DATA_LO */ -/* ========================================================================= */ -#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL -#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL -#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL -#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL -#define UVH_GR0_TLB_MMR_READ_DATA_LO ( \ - is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \ - is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \ - is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO : \ - /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO) - -#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - - -union uvh_gr0_tlb_mmr_read_data_lo_u { - unsigned long v; - struct uvh_gr0_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s; - struct uv1h_gr0_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s1; - struct uvxh_gr0_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } sx; - struct uv2h_gr0_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ + /* UV2 unique struct */ + struct uv2h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } s2; - struct uv3h_gr0_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s3; - struct uv4h_gr0_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s4; }; /* ========================================================================= */ /* UVH_GR1_TLB_INT0_CONFIG */ /* ========================================================================= */ -#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL -#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL -#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL -#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL #define UVH_GR1_TLB_INT0_CONFIG ( \ - is_uv1_hub() ? UV1H_GR1_TLB_INT0_CONFIG : \ - is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG : \ - is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG : \ - /*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG) - -#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 -#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13 -#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15 -#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16 -#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + is_uv(UV4) ? 0x62100UL : \ + is_uv(UV3) ? 0x61f00UL : \ + is_uv(UV2) ? 0x61f00UL : \ + uv_undefined("UVH_GR1_TLB_INT0_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL union uvh_gr1_tlb_int0_config_u { unsigned long v; + + /* UVH common struct */ struct uvh_gr1_tlb_int0_config_s { unsigned long vector_:8; /* RW */ unsigned long dm:3; /* RW */ @@ -1295,41 +2495,97 @@ union uvh_gr1_tlb_int0_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; + + /* UVXH common struct */ + struct uvxh_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; + + /* UV2 unique struct */ + struct uv2h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; }; /* ========================================================================= */ /* UVH_GR1_TLB_INT1_CONFIG */ /* ========================================================================= */ -#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL -#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL -#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL -#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL #define UVH_GR1_TLB_INT1_CONFIG ( \ - is_uv1_hub() ? UV1H_GR1_TLB_INT1_CONFIG : \ - is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG : \ - is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG : \ - /*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG) - -#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 -#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13 -#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15 -#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16 -#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + is_uv(UV4) ? 0x62140UL : \ + is_uv(UV3) ? 0x61f40UL : \ + is_uv(UV2) ? 0x61f40UL : \ + uv_undefined("UVH_GR1_TLB_INT1_CONFIG")) + + +/* UVXH common defines */ +#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL union uvh_gr1_tlb_int1_config_u { unsigned long v; + + /* UVH common struct */ struct uvh_gr1_tlb_int1_config_s { unsigned long vector_:8; /* RW */ unsigned long dm:3; /* RW */ @@ -1342,416 +2598,62 @@ union uvh_gr1_tlb_int1_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; -}; -/* ========================================================================= */ -/* UVH_GR1_TLB_MMR_CONTROL */ -/* ========================================================================= */ -#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL -#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL -#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL -#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL -#define UVH_GR1_TLB_MMR_CONTROL ( \ - is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \ - is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \ - is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL : \ - /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL) - -#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL - -#define UV1H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT 54 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT 56 -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT 60 -#define UV1H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL -#define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK 0x0040000000000000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL -#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL - -#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL - -#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 -#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL -#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL -#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL - -#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL -#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL - -#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 13 -#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 -#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59 -#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL -#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL -#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL -#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL -#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL - - -union uvh_gr1_tlb_mmr_control_u { - unsigned long v; - struct uvh_gr1_tlb_mmr_control_s { - unsigned long rsvd_0_15:16; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long rsvd_32_48:17; - unsigned long rsvd_49_51:3; - unsigned long rsvd_52_63:12; - } s; - struct uv1h_gr1_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long rsvd_32_47:16; - unsigned long mmr_inj_con:1; /* RW */ - unsigned long rsvd_49_51:3; - unsigned long mmr_inj_tlbram:1; /* RW */ - unsigned long rsvd_53:1; - unsigned long mmr_inj_tlbpgsize:1; /* RW */ - unsigned long rsvd_55:1; - unsigned long mmr_inj_tlbrreg:1; /* RW */ - unsigned long rsvd_57_59:3; - unsigned long mmr_inj_tlblruv:1; /* RW */ - unsigned long rsvd_61_63:3; - } s1; - struct uvxh_gr1_tlb_mmr_control_s { - unsigned long rsvd_0_15:16; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long rsvd_48:1; - unsigned long rsvd_49_51:3; - unsigned long rsvd_52_63:12; + /* UVXH common struct */ + struct uvxh_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } sx; - struct uv2h_gr1_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long rsvd_21_29:9; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long mmr_inj_con:1; /* RW */ - unsigned long rsvd_49_51:3; - unsigned long mmr_inj_tlbram:1; /* RW */ - unsigned long rsvd_53_63:11; - } s2; - struct uv3h_gr1_tlb_mmr_control_s { - unsigned long index:12; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_14_15:2; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long ecc_sel:1; /* RW */ - unsigned long rsvd_22_29:8; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long undef_48:1; /* Undefined */ - unsigned long rsvd_49_51:3; - unsigned long undef_52:1; /* Undefined */ - unsigned long rsvd_53_63:11; - } s3; - struct uv4h_gr1_tlb_mmr_control_s { - unsigned long index:13; /* RW */ - unsigned long mem_sel:2; /* RW */ - unsigned long rsvd_15:1; - unsigned long auto_valid_en:1; /* RW */ - unsigned long rsvd_17_19:3; - unsigned long mmr_hash_index_en:1; /* RW */ - unsigned long ecc_sel:1; /* RW */ - unsigned long rsvd_22_29:8; - unsigned long mmr_write:1; /* WP */ - unsigned long mmr_read:1; /* WP */ - unsigned long mmr_op_done:1; /* RW */ - unsigned long rsvd_33_47:15; - unsigned long undef_48:1; /* Undefined */ - unsigned long rsvd_49_51:3; - unsigned long rsvd_52_58:7; - unsigned long page_size:5; /* RW */ + + /* UV4 unique struct */ + struct uv4h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } s4; -}; -/* ========================================================================= */ -/* UVH_GR1_TLB_MMR_READ_DATA_HI */ -/* ========================================================================= */ -#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL -#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL -#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL -#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL -#define UVH_GR1_TLB_MMR_READ_DATA_HI ( \ - is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \ - is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \ - is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI : \ - /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI) - -#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 - -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL - -#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 - -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL - -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45 -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL -#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL - -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT 34 -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 49 -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51 -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52 -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53 -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL -#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL - - -union uvh_gr1_tlb_mmr_read_data_hi_u { - unsigned long v; - struct uv1h_gr1_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } s1; - struct uv2h_gr1_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long rsvd_45_63:19; - } s2; - struct uv3h_gr1_tlb_mmr_read_data_hi_s { - unsigned long pfn:41; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long aa_ext:1; /* RO */ - unsigned long undef_46_54:9; /* Undefined */ - unsigned long way_ecc:9; /* RO */ + /* UV3 unique struct */ + struct uv3h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } s3; - struct uv4h_gr1_tlb_mmr_read_data_hi_s { - unsigned long pfn:34; /* RO */ - unsigned long pnid:15; /* RO */ - unsigned long gaa:2; /* RO */ - unsigned long dirty:1; /* RO */ - unsigned long larger:1; /* RO */ - unsigned long aa_ext:1; /* RO */ - unsigned long undef_54:1; /* Undefined */ - unsigned long way_ecc:9; /* RO */ - } s4; -}; -/* ========================================================================= */ -/* UVH_GR1_TLB_MMR_READ_DATA_LO */ -/* ========================================================================= */ -#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL -#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL -#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL -#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL -#define UVH_GR1_TLB_MMR_READ_DATA_LO ( \ - is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \ - is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \ - is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO : \ - /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO) - -#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - -#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 -#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 -#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 -#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL -#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL -#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL - - -union uvh_gr1_tlb_mmr_read_data_lo_u { - unsigned long v; - struct uvh_gr1_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s; - struct uv1h_gr1_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s1; - struct uvxh_gr1_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } sx; - struct uv2h_gr1_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ + /* UV2 unique struct */ + struct uv2h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } s2; - struct uv3h_gr1_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s3; - struct uv4h_gr1_tlb_mmr_read_data_lo_s { - unsigned long vpn:39; /* RO */ - unsigned long asid:24; /* RO */ - unsigned long valid:1; /* RO */ - } s4; }; /* ========================================================================= */ @@ -1759,58 +2661,43 @@ union uvh_gr1_tlb_mmr_read_data_lo_u { /* ========================================================================= */ #define UVH_INT_CMPB 0x22080UL +/* UVH common defines*/ #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL union uvh_int_cmpb_u { unsigned long v; + + /* UVH common struct */ struct uvh_int_cmpb_s { unsigned long real_time_cmpb:56; /* RW */ unsigned long rsvd_56_63:8; } s; -}; - -/* ========================================================================= */ -/* UVH_INT_CMPC */ -/* ========================================================================= */ -#define UVH_INT_CMPC 0x22100UL - - -#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 -#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL -#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 -#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL - - -union uvh_int_cmpc_u { - unsigned long v; - struct uvh_int_cmpc_s { - unsigned long real_time_cmpc:56; /* RW */ + /* UV5 unique struct */ + struct uv5h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ unsigned long rsvd_56_63:8; - } s; -}; - -/* ========================================================================= */ -/* UVH_INT_CMPD */ -/* ========================================================================= */ -#define UVH_INT_CMPD 0x22180UL - - -#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 -#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL + } s5; -#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 -#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL + /* UV4 unique struct */ + struct uv4h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s4; + /* UV3 unique struct */ + struct uv3h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; -union uvh_int_cmpd_u { - unsigned long v; - struct uvh_int_cmpd_s { - unsigned long real_time_cmpd:56; /* RW */ + /* UV2 unique struct */ + struct uv2h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ unsigned long rsvd_56_63:8; - } s; + } s2; }; /* ========================================================================= */ @@ -1818,30 +2705,23 @@ union uvh_int_cmpd_u { /* ========================================================================= */ #define UVH_IPI_INT 0x60500UL -#define UV1H_IPI_INT_32 0x348 -#define UV2H_IPI_INT_32 0x348 -#define UV3H_IPI_INT_32 0x348 -#define UV4H_IPI_INT_32 0x268 -#define UVH_IPI_INT_32 ( \ - is_uv1_hub() ? UV1H_IPI_INT_32 : \ - is_uv2_hub() ? UV2H_IPI_INT_32 : \ - is_uv3_hub() ? UV3H_IPI_INT_32 : \ - /*is_uv4_hub*/ UV4H_IPI_INT_32) - +/* UVH common defines*/ #define UVH_IPI_INT_VECTOR_SHFT 0 -#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 -#define UVH_IPI_INT_DESTMODE_SHFT 11 -#define UVH_IPI_INT_APIC_ID_SHFT 16 -#define UVH_IPI_INT_SEND_SHFT 63 #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL +#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 #define UVH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL +#define UVH_IPI_INT_DESTMODE_SHFT 11 #define UVH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL +#define UVH_IPI_INT_APIC_ID_SHFT 16 #define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL +#define UVH_IPI_INT_SEND_SHFT 63 #define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL union uvh_ipi_int_u { unsigned long v; + + /* UVH common struct */ struct uvh_ipi_int_s { unsigned long vector_:8; /* RW */ unsigned long delivery_mode:3; /* RW */ @@ -1851,1243 +2731,733 @@ union uvh_ipi_int_u { unsigned long rsvd_48_62:15; unsigned long send:1; /* WP */ } s; + + /* UV5 unique struct */ + struct uv5h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s5; + + /* UV4 unique struct */ + struct uv4h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s4; + + /* UV3 unique struct */ + struct uv3h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s3; + + /* UV2 unique struct */ + struct uv2h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s2; }; /* ========================================================================= */ -/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ +/* UVH_NODE_ID */ /* ========================================================================= */ -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST") -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST ( \ - is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \ - is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \ - is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \ - /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST) -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 +#define UVH_NODE_ID 0x0UL +/* UVH common defines*/ +#define UVH_NODE_ID_FORCE1_SHFT 0 +#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UVH_NODE_ID_MANUFACTURER_SHFT 1 +#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UVH_NODE_ID_PART_NUMBER_SHFT 12 +#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UVH_NODE_ID_REVISION_SHFT 28 +#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UVH_NODE_ID_NODE_ID_SHFT 32 +#define UVH_NODE_ID_NI_PORT_SHFT 57 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL +/* UVXH common defines */ +#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50 +#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL +#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL +/* UVYH common defines */ +#define UVYH_NODE_ID_NODE_ID_MASK 0x0000007f00000000UL +#define UVYH_NODE_ID_NI_PORT_MASK 0x7e00000000000000UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL +/* UV4 unique defines */ +#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48 +#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL +#define UV4H_NODE_ID_RESERVED_2_SHFT 49 +#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL +/* UV3 unique defines */ +#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48 +#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL +#define UV3H_NODE_ID_RESERVED_2_SHFT 49 +#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL -union uvh_lb_bau_intd_payload_queue_first_u { +union uvh_node_id_u { unsigned long v; - struct uv1h_lb_bau_intd_payload_queue_first_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s1; - struct uv2h_lb_bau_intd_payload_queue_first_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ + + /* UVH common struct */ + struct uvh_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long rsvd_32_63:32; + } s; + + /* UVXH common struct */ + struct uvxh_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47_49:3; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } sx; + + /* UVYH common struct */ + struct uvyh_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:7; /* RW */ + unsigned long rsvd_39_56:18; + unsigned long ni_port:6; /* RO */ unsigned long rsvd_63:1; - } s2; - struct uv3h_lb_bau_intd_payload_queue_first_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:7; /* RW */ + unsigned long rsvd_39_56:18; + unsigned long ni_port:6; /* RO */ unsigned long rsvd_63:1; + } s5; + + /* UV4 unique struct */ + struct uv4h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47:1; + unsigned long router_select:1; /* RO */ + unsigned long rsvd_49:1; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } s4; + + /* UV3 unique struct */ + struct uv3h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47:1; + unsigned long router_select:1; /* RO */ + unsigned long rsvd_49:1; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; } s3; + + /* UV2 unique struct */ + struct uv2h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47_49:3; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } s2; }; /* ========================================================================= */ -/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ +/* UVH_NODE_PRESENT_0 */ /* ========================================================================= */ -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST") -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST ( \ - is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \ - is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \ - is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \ - /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST) -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 +#define UVH_NODE_PRESENT_0 ( \ + is_uv(UV5) ? 0x1400UL : \ + 0) -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL +/* UVYH common defines */ +#define UVYH_NODE_PRESENT_0_NODES_SHFT 0 +#define UVYH_NODE_PRESENT_0_NODES_MASK 0xffffffffffffffffUL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL +union uvh_node_present_0_u { + unsigned long v; -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + /* UVH common struct */ + struct uvh_node_present_0_s { + unsigned long nodes:64; /* RW */ + } s; + /* UVYH common struct */ + struct uvyh_node_present_0_s { + unsigned long nodes:64; /* RW */ + } sy; -union uvh_lb_bau_intd_payload_queue_last_u { - unsigned long v; - struct uv1h_lb_bau_intd_payload_queue_last_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s1; - struct uv2h_lb_bau_intd_payload_queue_last_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s2; - struct uv3h_lb_bau_intd_payload_queue_last_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s3; + /* UV5 unique struct */ + struct uv5h_node_present_0_s { + unsigned long nodes:64; /* RW */ + } s5; }; /* ========================================================================= */ -/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ +/* UVH_NODE_PRESENT_1 */ /* ========================================================================= */ -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL") -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL ( \ - is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \ - is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \ - is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \ - /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL) -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 +#define UVH_NODE_PRESENT_1 ( \ + is_uv(UV5) ? 0x1408UL : \ + 0) -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL +/* UVYH common defines */ +#define UVYH_NODE_PRESENT_1_NODES_SHFT 0 +#define UVYH_NODE_PRESENT_1_NODES_MASK 0xffffffffffffffffUL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL +union uvh_node_present_1_u { + unsigned long v; -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + /* UVH common struct */ + struct uvh_node_present_1_s { + unsigned long nodes:64; /* RW */ + } s; + /* UVYH common struct */ + struct uvyh_node_present_1_s { + unsigned long nodes:64; /* RW */ + } sy; -union uvh_lb_bau_intd_payload_queue_tail_u { - unsigned long v; - struct uv1h_lb_bau_intd_payload_queue_tail_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s1; - struct uv2h_lb_bau_intd_payload_queue_tail_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s2; - struct uv3h_lb_bau_intd_payload_queue_tail_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s3; + /* UV5 unique struct */ + struct uv5h_node_present_1_s { + unsigned long nodes:64; /* RW */ + } s5; }; /* ========================================================================= */ -/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ +/* UVH_NODE_PRESENT_TABLE */ /* ========================================================================= */ -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE") -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE ( \ - is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \ - is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \ - is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \ - /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE) -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 - - -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL - - -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL - -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL - - -union uvh_lb_bau_intd_software_acknowledge_u { +#define UVH_NODE_PRESENT_TABLE ( \ + is_uv(UV4) ? 0x1400UL : \ + is_uv(UV3) ? 0x1400UL : \ + is_uv(UV2) ? 0x1400UL : \ + 0) + +#define UVH_NODE_PRESENT_TABLE_DEPTH ( \ + is_uv(UV4) ? 4 : \ + is_uv(UV3) ? 16 : \ + is_uv(UV2) ? 16 : \ + 0) + + +/* UVXH common defines */ +#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UVXH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + + +union uvh_node_present_table_u { unsigned long v; - struct uv1h_lb_bau_intd_software_acknowledge_s { - unsigned long pending_0:1; /* RW, W1C */ - unsigned long pending_1:1; /* RW, W1C */ - unsigned long pending_2:1; /* RW, W1C */ - unsigned long pending_3:1; /* RW, W1C */ - unsigned long pending_4:1; /* RW, W1C */ - unsigned long pending_5:1; /* RW, W1C */ - unsigned long pending_6:1; /* RW, W1C */ - unsigned long pending_7:1; /* RW, W1C */ - unsigned long timeout_0:1; /* RW, W1C */ - unsigned long timeout_1:1; /* RW, W1C */ - unsigned long timeout_2:1; /* RW, W1C */ - unsigned long timeout_3:1; /* RW, W1C */ - unsigned long timeout_4:1; /* RW, W1C */ - unsigned long timeout_5:1; /* RW, W1C */ - unsigned long timeout_6:1; /* RW, W1C */ - unsigned long timeout_7:1; /* RW, W1C */ - unsigned long rsvd_16_63:48; - } s1; - struct uv2h_lb_bau_intd_software_acknowledge_s { - unsigned long pending_0:1; /* RW */ - unsigned long pending_1:1; /* RW */ - unsigned long pending_2:1; /* RW */ - unsigned long pending_3:1; /* RW */ - unsigned long pending_4:1; /* RW */ - unsigned long pending_5:1; /* RW */ - unsigned long pending_6:1; /* RW */ - unsigned long pending_7:1; /* RW */ - unsigned long timeout_0:1; /* RW */ - unsigned long timeout_1:1; /* RW */ - unsigned long timeout_2:1; /* RW */ - unsigned long timeout_3:1; /* RW */ - unsigned long timeout_4:1; /* RW */ - unsigned long timeout_5:1; /* RW */ - unsigned long timeout_6:1; /* RW */ - unsigned long timeout_7:1; /* RW */ - unsigned long rsvd_16_63:48; - } s2; - struct uv3h_lb_bau_intd_software_acknowledge_s { - unsigned long pending_0:1; /* RW */ - unsigned long pending_1:1; /* RW */ - unsigned long pending_2:1; /* RW */ - unsigned long pending_3:1; /* RW */ - unsigned long pending_4:1; /* RW */ - unsigned long pending_5:1; /* RW */ - unsigned long pending_6:1; /* RW */ - unsigned long pending_7:1; /* RW */ - unsigned long timeout_0:1; /* RW */ - unsigned long timeout_1:1; /* RW */ - unsigned long timeout_2:1; /* RW */ - unsigned long timeout_3:1; /* RW */ - unsigned long timeout_4:1; /* RW */ - unsigned long timeout_5:1; /* RW */ - unsigned long timeout_6:1; /* RW */ - unsigned long timeout_7:1; /* RW */ - unsigned long rsvd_16_63:48; + + /* UVH common struct */ + struct uvh_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_node_present_table_s { + unsigned long nodes:64; /* RW */ + } sx; + + /* UV4 unique struct */ + struct uv4h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_node_present_table_s { + unsigned long nodes:64; /* RW */ } s3; + + /* UV2 unique struct */ + struct uv2h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s2; }; /* ========================================================================= */ -/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ +/* UVH_RH10_GAM_ADDR_MAP_CONFIG */ /* ========================================================================= */ -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL -#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS") -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS ( \ - is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \ - is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \ - is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \ - /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS) -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 +#define UVH_RH10_GAM_ADDR_MAP_CONFIG ( \ + is_uv(UV5) ? 0x470000UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6 +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000001c0UL +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_SHFT 12 +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_MASK 0x0000000000001000UL +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_SHFT 16 +#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_MASK 0x00000000000f0000UL + + +union uvh_rh10_gam_addr_map_config_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_addr_map_config_s { + unsigned long undef_0_5:6; /* Undefined */ + unsigned long n_skt:3; /* RW */ + unsigned long undef_9_11:3; /* Undefined */ + unsigned long ls_enable:1; /* RW */ + unsigned long undef_13_15:3; /* Undefined */ + unsigned long mk_tme_keyid_bits:4; /* RW */ + unsigned long rsvd_20_63:44; + } s; + /* UVYH common struct */ + struct uvyh_rh10_gam_addr_map_config_s { + unsigned long undef_0_5:6; /* Undefined */ + unsigned long n_skt:3; /* RW */ + unsigned long undef_9_11:3; /* Undefined */ + unsigned long ls_enable:1; /* RW */ + unsigned long undef_13_15:3; /* Undefined */ + unsigned long mk_tme_keyid_bits:4; /* RW */ + unsigned long rsvd_20_63:44; + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_addr_map_config_s { + unsigned long undef_0_5:6; /* Undefined */ + unsigned long n_skt:3; /* RW */ + unsigned long undef_9_11:3; /* Undefined */ + unsigned long ls_enable:1; /* RW */ + unsigned long undef_13_15:3; /* Undefined */ + unsigned long mk_tme_keyid_bits:4; /* RW */ + } s5; +}; /* ========================================================================= */ -/* UVH_LB_BAU_MISC_CONTROL */ +/* UVH_RH10_GAM_GRU_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL -#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL -#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL -#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL -#define UVH_LB_BAU_MISC_CONTROL ( \ - is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL : \ - is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL : \ - is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL : \ - /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL) - -#define UV1H_LB_BAU_MISC_CONTROL_32 0xa10 -#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10 -#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10 -#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18 -#define UVH_LB_BAU_MISC_CONTROL_32 ( \ - is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_32 : \ - is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 : \ - is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32) - -#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 -#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 -#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 -#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 -#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 -#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 -#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 -#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 -#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 -#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 -#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL -#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL -#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL -#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL -#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL -#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL -#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL -#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL -#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL -#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL - -#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 -#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 -#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 -#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 -#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 -#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 -#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 -#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 -#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 -#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 -#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 -#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 -#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 -#define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL -#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL -#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL -#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL -#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL -#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL -#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL -#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL -#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL -#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL -#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL -#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL -#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL - -#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 -#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 -#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 -#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 -#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 -#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 -#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 -#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 -#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 -#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 -#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 -#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 -#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 -#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 -#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL -#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL -#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL -#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL -#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL -#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL -#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL -#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL -#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL -#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL -#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL -#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL -#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL -#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL -#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL -#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL - -#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 -#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 -#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 -#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 -#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 -#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 -#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 -#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 -#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 -#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 -#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 -#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 -#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 -#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 -#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 -#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL -#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL -#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL -#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL -#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL -#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL -#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL -#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL -#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL -#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL -#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL -#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL -#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL -#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL -#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL -#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL -#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL - -#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 -#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 -#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 -#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 -#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 -#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 -#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 -#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 -#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 -#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 -#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 -#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36 -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37 -#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38 -#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL -#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL -#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL -#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL -#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL -#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL -#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL -#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL -#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL -#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL -#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL -#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL -#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL -#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL -#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL -#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL - -#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 -#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 -#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 -#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 -#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 -#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT 15 -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 -#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 -#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 -#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 -#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 -#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 -#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36 -#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT 37 -#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38 -#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46 -#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL -#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL -#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL -#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL -#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL -#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK 0x00000000000f8000UL -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL -#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL -#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL -#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL -#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL -#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL -#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL -#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL -#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK 0x0000002000000000UL -#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL -#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL -#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL - -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK \ - uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK") -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK ( \ - is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \ - is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \ - is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \ - /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK) -#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT \ - uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT") -#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT ( \ - is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \ - is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \ - is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \ - /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT) -#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK \ - uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK") -#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK ( \ - is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \ - is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \ - is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \ - /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK) -#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT \ - uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT") -#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT ( \ - is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \ - is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \ - is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \ - /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT) - -union uvh_lb_bau_misc_control_u { +#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG ( \ + is_uv(UV5) ? 0x4700b0UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 25 +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52 +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x0070000000000000UL +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffe000000UL : \ + 0) +#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV5) ? 25 : \ + -1) + +union uvh_rh10_gam_gru_overlay_config_u { unsigned long v; - struct uvh_lb_bau_misc_control_s { - unsigned long rejection_delay:8; /* RW */ - unsigned long apic_mode:1; /* RW */ - unsigned long force_broadcast:1; /* RW */ - unsigned long force_lock_nop:1; /* RW */ - unsigned long qpi_agent_presence_vector:3; /* RW */ - unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long rsvd_15_19:5; - unsigned long enable_dual_mapping_mode:1; /* RW */ - unsigned long vga_io_port_decode_enable:1; /* RW */ - unsigned long vga_io_port_16_bit_decode:1; /* RW */ - unsigned long suppress_dest_registration:1; /* RW */ - unsigned long programmed_initial_priority:3; /* RW */ - unsigned long use_incoming_priority:1; /* RW */ - unsigned long enable_programmed_initial_priority:1;/* RW */ - unsigned long rsvd_29_47:19; - unsigned long fun:16; /* RW */ + + /* UVH common struct */ + struct uvh_rh10_gam_gru_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long n_gru:3; /* RW */ + unsigned long undef_55_62:8; /* Undefined */ + unsigned long enable:1; /* RW */ } s; - struct uv1h_lb_bau_misc_control_s { - unsigned long rejection_delay:8; /* RW */ - unsigned long apic_mode:1; /* RW */ - unsigned long force_broadcast:1; /* RW */ - unsigned long force_lock_nop:1; /* RW */ - unsigned long qpi_agent_presence_vector:3; /* RW */ - unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long enable_intd_soft_ack_mode:1; /* RW */ - unsigned long intd_soft_ack_timeout_period:4; /* RW */ - unsigned long enable_dual_mapping_mode:1; /* RW */ - unsigned long vga_io_port_decode_enable:1; /* RW */ - unsigned long vga_io_port_16_bit_decode:1; /* RW */ - unsigned long suppress_dest_registration:1; /* RW */ - unsigned long programmed_initial_priority:3; /* RW */ - unsigned long use_incoming_priority:1; /* RW */ - unsigned long enable_programmed_initial_priority:1;/* RW */ - unsigned long rsvd_29_47:19; - unsigned long fun:16; /* RW */ - } s1; - struct uvxh_lb_bau_misc_control_s { - unsigned long rejection_delay:8; /* RW */ - unsigned long apic_mode:1; /* RW */ - unsigned long force_broadcast:1; /* RW */ - unsigned long force_lock_nop:1; /* RW */ - unsigned long qpi_agent_presence_vector:3; /* RW */ - unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long rsvd_15_19:5; - unsigned long enable_dual_mapping_mode:1; /* RW */ - unsigned long vga_io_port_decode_enable:1; /* RW */ - unsigned long vga_io_port_16_bit_decode:1; /* RW */ - unsigned long suppress_dest_registration:1; /* RW */ - unsigned long programmed_initial_priority:3; /* RW */ - unsigned long use_incoming_priority:1; /* RW */ - unsigned long enable_programmed_initial_priority:1;/* RW */ - unsigned long enable_automatic_apic_mode_selection:1;/* RW */ - unsigned long apic_mode_status:1; /* RO */ - unsigned long suppress_interrupts_to_self:1; /* RW */ - unsigned long enable_lock_based_system_flush:1;/* RW */ - unsigned long enable_extended_sb_status:1; /* RW */ - unsigned long suppress_int_prio_udt_to_self:1;/* RW */ - unsigned long use_legacy_descriptor_formats:1;/* RW */ - unsigned long rsvd_36_47:12; - unsigned long fun:16; /* RW */ - } sx; - struct uv2h_lb_bau_misc_control_s { - unsigned long rejection_delay:8; /* RW */ - unsigned long apic_mode:1; /* RW */ - unsigned long force_broadcast:1; /* RW */ - unsigned long force_lock_nop:1; /* RW */ - unsigned long qpi_agent_presence_vector:3; /* RW */ - unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long enable_intd_soft_ack_mode:1; /* RW */ - unsigned long intd_soft_ack_timeout_period:4; /* RW */ - unsigned long enable_dual_mapping_mode:1; /* RW */ - unsigned long vga_io_port_decode_enable:1; /* RW */ - unsigned long vga_io_port_16_bit_decode:1; /* RW */ - unsigned long suppress_dest_registration:1; /* RW */ - unsigned long programmed_initial_priority:3; /* RW */ - unsigned long use_incoming_priority:1; /* RW */ - unsigned long enable_programmed_initial_priority:1;/* RW */ - unsigned long enable_automatic_apic_mode_selection:1;/* RW */ - unsigned long apic_mode_status:1; /* RO */ - unsigned long suppress_interrupts_to_self:1; /* RW */ - unsigned long enable_lock_based_system_flush:1;/* RW */ - unsigned long enable_extended_sb_status:1; /* RW */ - unsigned long suppress_int_prio_udt_to_self:1;/* RW */ - unsigned long use_legacy_descriptor_formats:1;/* RW */ - unsigned long rsvd_36_47:12; - unsigned long fun:16; /* RW */ - } s2; - struct uv3h_lb_bau_misc_control_s { - unsigned long rejection_delay:8; /* RW */ - unsigned long apic_mode:1; /* RW */ - unsigned long force_broadcast:1; /* RW */ - unsigned long force_lock_nop:1; /* RW */ - unsigned long qpi_agent_presence_vector:3; /* RW */ - unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long enable_intd_soft_ack_mode:1; /* RW */ - unsigned long intd_soft_ack_timeout_period:4; /* RW */ - unsigned long enable_dual_mapping_mode:1; /* RW */ - unsigned long vga_io_port_decode_enable:1; /* RW */ - unsigned long vga_io_port_16_bit_decode:1; /* RW */ - unsigned long suppress_dest_registration:1; /* RW */ - unsigned long programmed_initial_priority:3; /* RW */ - unsigned long use_incoming_priority:1; /* RW */ - unsigned long enable_programmed_initial_priority:1;/* RW */ - unsigned long enable_automatic_apic_mode_selection:1;/* RW */ - unsigned long apic_mode_status:1; /* RO */ - unsigned long suppress_interrupts_to_self:1; /* RW */ - unsigned long enable_lock_based_system_flush:1;/* RW */ - unsigned long enable_extended_sb_status:1; /* RW */ - unsigned long suppress_int_prio_udt_to_self:1;/* RW */ - unsigned long use_legacy_descriptor_formats:1;/* RW */ - unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */ - unsigned long enable_intd_prefetch_hint:1; /* RW */ - unsigned long thread_kill_timebase:8; /* RW */ - unsigned long rsvd_46_47:2; - unsigned long fun:16; /* RW */ - } s3; - struct uv4h_lb_bau_misc_control_s { - unsigned long rejection_delay:8; /* RW */ - unsigned long apic_mode:1; /* RW */ - unsigned long force_broadcast:1; /* RW */ - unsigned long force_lock_nop:1; /* RW */ - unsigned long qpi_agent_presence_vector:3; /* RW */ - unsigned long descriptor_fetch_mode:1; /* RW */ - unsigned long rsvd_15_19:5; - unsigned long enable_dual_mapping_mode:1; /* RW */ - unsigned long vga_io_port_decode_enable:1; /* RW */ - unsigned long vga_io_port_16_bit_decode:1; /* RW */ - unsigned long suppress_dest_registration:1; /* RW */ - unsigned long programmed_initial_priority:3; /* RW */ - unsigned long use_incoming_priority:1; /* RW */ - unsigned long enable_programmed_initial_priority:1;/* RW */ - unsigned long enable_automatic_apic_mode_selection:1;/* RW */ - unsigned long apic_mode_status:1; /* RO */ - unsigned long suppress_interrupts_to_self:1; /* RW */ - unsigned long enable_lock_based_system_flush:1;/* RW */ - unsigned long enable_extended_sb_status:1; /* RW */ - unsigned long suppress_int_prio_udt_to_self:1;/* RW */ - unsigned long use_legacy_descriptor_formats:1;/* RW */ - unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */ - unsigned long rsvd_37:1; - unsigned long thread_kill_timebase:8; /* RW */ - unsigned long address_interleave_select:1; /* RW */ - unsigned long rsvd_47:1; - unsigned long fun:16; /* RW */ - } s4; + + /* UVYH common struct */ + struct uvyh_rh10_gam_gru_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long n_gru:3; /* RW */ + unsigned long undef_55_62:8; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_gru_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long n_gru:3; /* RW */ + unsigned long undef_55_62:8; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; }; /* ========================================================================= */ -/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ +/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 */ /* ========================================================================= */ -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL) - -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 -#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8 -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32) - -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL - - -union uvh_lb_bau_sb_activation_control_u { +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 ( \ + is_uv(UV5) ? 0x473000UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffc000000UL : \ + 0) +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \ + is_uv(UV5) ? 26 : \ + -1) + +union uvh_rh10_gam_mmioh_overlay_config0_u { unsigned long v; - struct uvh_lb_bau_sb_activation_control_s { - unsigned long index:6; /* RW */ - unsigned long rsvd_6_61:56; - unsigned long push:1; /* WP */ - unsigned long init:1; /* WP */ + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; }; /* ========================================================================= */ -/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ +/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 */ /* ========================================================================= */ -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0) - -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0 -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32) - -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL - - -union uvh_lb_bau_sb_activation_status_0_u { +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 ( \ + is_uv(UV5) ? 0x474000UL : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffc000000UL : \ + 0) +#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \ + is_uv(UV5) ? 26 : \ + -1) + +union uvh_rh10_gam_mmioh_overlay_config1_u { unsigned long v; - struct uvh_lb_bau_sb_activation_status_0_s { - unsigned long status:64; /* RW */ + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; }; /* ========================================================================= */ -/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ +/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 */ /* ========================================================================= */ -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1) - -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8 -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32) - -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL - - -union uvh_lb_bau_sb_activation_status_1_u { +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 ( \ + is_uv(UV5) ? 0x473800UL : \ + 0) + +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \ + is_uv(UV5) ? 128 : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x000000000000007fUL + + +union uvh_rh10_gam_mmioh_redirect_config0_u { unsigned long v; - struct uvh_lb_bau_sb_activation_status_1_s { - unsigned long status:64; /* RW */ + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_redirect_config0_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_redirect_config0_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_redirect_config0_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } s5; }; /* ========================================================================= */ -/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ +/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 */ /* ========================================================================= */ -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE) - -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 -#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0 -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32) - -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 - -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL -#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 53 -#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL -#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0xffe0000000000000UL - -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ - is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT) - -#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ - is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK) - -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK ( \ - is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ - is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ - is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK) +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 ( \ + is_uv(UV5) ? 0x474800UL : \ + 0) + +#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \ + is_uv(UV5) ? 128 : \ + 0) + + +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x000000000000007fUL + + +union uvh_rh10_gam_mmioh_redirect_config1_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh10_gam_mmioh_redirect_config1_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } s; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmioh_redirect_config1_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmioh_redirect_config1_s { + unsigned long nasid:7; /* RW */ + unsigned long rsvd_7_63:57; + } s5; +}; /* ========================================================================= */ -/* UVH_NODE_ID */ +/* UVH_RH10_GAM_MMR_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UVH_NODE_ID 0x0UL -#define UV1H_NODE_ID 0x0UL -#define UV2H_NODE_ID 0x0UL -#define UV3H_NODE_ID 0x0UL -#define UV4H_NODE_ID 0x0UL +#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG ( \ + is_uv(UV5) ? 0x470090UL : \ + 0) -#define UVH_NODE_ID_FORCE1_SHFT 0 -#define UVH_NODE_ID_MANUFACTURER_SHFT 1 -#define UVH_NODE_ID_PART_NUMBER_SHFT 12 -#define UVH_NODE_ID_REVISION_SHFT 28 -#define UVH_NODE_ID_NODE_ID_SHFT 32 -#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL -#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL -#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL -#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL -#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL - -#define UV1H_NODE_ID_FORCE1_SHFT 0 -#define UV1H_NODE_ID_MANUFACTURER_SHFT 1 -#define UV1H_NODE_ID_PART_NUMBER_SHFT 12 -#define UV1H_NODE_ID_REVISION_SHFT 28 -#define UV1H_NODE_ID_NODE_ID_SHFT 32 -#define UV1H_NODE_ID_NODES_PER_BIT_SHFT 48 -#define UV1H_NODE_ID_NI_PORT_SHFT 56 -#define UV1H_NODE_ID_FORCE1_MASK 0x0000000000000001UL -#define UV1H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL -#define UV1H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL -#define UV1H_NODE_ID_REVISION_MASK 0x00000000f0000000UL -#define UV1H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL -#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL - -#define UVXH_NODE_ID_FORCE1_SHFT 0 -#define UVXH_NODE_ID_MANUFACTURER_SHFT 1 -#define UVXH_NODE_ID_PART_NUMBER_SHFT 12 -#define UVXH_NODE_ID_REVISION_SHFT 28 -#define UVXH_NODE_ID_NODE_ID_SHFT 32 -#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50 -#define UVXH_NODE_ID_NI_PORT_SHFT 57 -#define UVXH_NODE_ID_FORCE1_MASK 0x0000000000000001UL -#define UVXH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL -#define UVXH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL -#define UVXH_NODE_ID_REVISION_MASK 0x00000000f0000000UL -#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL -#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL -#define UV2H_NODE_ID_FORCE1_SHFT 0 -#define UV2H_NODE_ID_MANUFACTURER_SHFT 1 -#define UV2H_NODE_ID_PART_NUMBER_SHFT 12 -#define UV2H_NODE_ID_REVISION_SHFT 28 -#define UV2H_NODE_ID_NODE_ID_SHFT 32 -#define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50 -#define UV2H_NODE_ID_NI_PORT_SHFT 57 -#define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL -#define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL -#define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL -#define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL -#define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL -#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL - -#define UV3H_NODE_ID_FORCE1_SHFT 0 -#define UV3H_NODE_ID_MANUFACTURER_SHFT 1 -#define UV3H_NODE_ID_PART_NUMBER_SHFT 12 -#define UV3H_NODE_ID_REVISION_SHFT 28 -#define UV3H_NODE_ID_NODE_ID_SHFT 32 -#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48 -#define UV3H_NODE_ID_RESERVED_2_SHFT 49 -#define UV3H_NODE_ID_NODES_PER_BIT_SHFT 50 -#define UV3H_NODE_ID_NI_PORT_SHFT 57 -#define UV3H_NODE_ID_FORCE1_MASK 0x0000000000000001UL -#define UV3H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL -#define UV3H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL -#define UV3H_NODE_ID_REVISION_MASK 0x00000000f0000000UL -#define UV3H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL -#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL -#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL -#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL - -#define UV4H_NODE_ID_FORCE1_SHFT 0 -#define UV4H_NODE_ID_MANUFACTURER_SHFT 1 -#define UV4H_NODE_ID_PART_NUMBER_SHFT 12 -#define UV4H_NODE_ID_REVISION_SHFT 28 -#define UV4H_NODE_ID_NODE_ID_SHFT 32 -#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48 -#define UV4H_NODE_ID_RESERVED_2_SHFT 49 -#define UV4H_NODE_ID_NODES_PER_BIT_SHFT 50 -#define UV4H_NODE_ID_NI_PORT_SHFT 57 -#define UV4H_NODE_ID_FORCE1_MASK 0x0000000000000001UL -#define UV4H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL -#define UV4H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL -#define UV4H_NODE_ID_REVISION_MASK 0x00000000f0000000UL -#define UV4H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL -#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL -#define UV4H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL -#define UV4H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL +/* UVYH common defines */ +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 25 +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL +#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV5) ? 0x000ffffffe000000UL : \ + 0) +#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV5) ? 25 : \ + -1) -union uvh_node_id_u { +union uvh_rh10_gam_mmr_overlay_config_u { unsigned long v; - struct uvh_node_id_s { - unsigned long force1:1; /* RO */ - unsigned long manufacturer:11; /* RO */ - unsigned long part_number:16; /* RO */ - unsigned long revision:4; /* RO */ - unsigned long node_id:15; /* RW */ - unsigned long rsvd_47_63:17; + + /* UVH common struct */ + struct uvh_rh10_gam_mmr_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long undef_52_62:11; /* Undefined */ + unsigned long enable:1; /* RW */ } s; - struct uv1h_node_id_s { - unsigned long force1:1; /* RO */ - unsigned long manufacturer:11; /* RO */ - unsigned long part_number:16; /* RO */ - unsigned long revision:4; /* RO */ - unsigned long node_id:15; /* RW */ - unsigned long rsvd_47:1; - unsigned long nodes_per_bit:7; /* RW */ - unsigned long rsvd_55:1; - unsigned long ni_port:4; /* RO */ - unsigned long rsvd_60_63:4; - } s1; - struct uvxh_node_id_s { - unsigned long force1:1; /* RO */ - unsigned long manufacturer:11; /* RO */ - unsigned long part_number:16; /* RO */ - unsigned long revision:4; /* RO */ - unsigned long node_id:15; /* RW */ - unsigned long rsvd_47_49:3; - unsigned long nodes_per_bit:7; /* RO */ - unsigned long ni_port:5; /* RO */ - unsigned long rsvd_62_63:2; - } sx; - struct uv2h_node_id_s { - unsigned long force1:1; /* RO */ - unsigned long manufacturer:11; /* RO */ - unsigned long part_number:16; /* RO */ - unsigned long revision:4; /* RO */ - unsigned long node_id:15; /* RW */ - unsigned long rsvd_47_49:3; - unsigned long nodes_per_bit:7; /* RO */ - unsigned long ni_port:5; /* RO */ - unsigned long rsvd_62_63:2; - } s2; - struct uv3h_node_id_s { - unsigned long force1:1; /* RO */ - unsigned long manufacturer:11; /* RO */ - unsigned long part_number:16; /* RO */ - unsigned long revision:4; /* RO */ - unsigned long node_id:15; /* RW */ - unsigned long rsvd_47:1; - unsigned long router_select:1; /* RO */ - unsigned long rsvd_49:1; - unsigned long nodes_per_bit:7; /* RO */ - unsigned long ni_port:5; /* RO */ - unsigned long rsvd_62_63:2; - } s3; - struct uv4h_node_id_s { - unsigned long force1:1; /* RO */ - unsigned long manufacturer:11; /* RO */ - unsigned long part_number:16; /* RO */ - unsigned long revision:4; /* RO */ - unsigned long node_id:15; /* RW */ - unsigned long rsvd_47:1; - unsigned long router_select:1; /* RO */ - unsigned long rsvd_49:1; - unsigned long nodes_per_bit:7; /* RO */ - unsigned long ni_port:5; /* RO */ - unsigned long rsvd_62_63:2; - } s4; + + /* UVYH common struct */ + struct uvyh_rh10_gam_mmr_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long undef_52_62:11; /* Undefined */ + unsigned long enable:1; /* RW */ + } sy; + + /* UV5 unique struct */ + struct uv5h_rh10_gam_mmr_overlay_config_s { + unsigned long undef_0_24:25; /* Undefined */ + unsigned long base:27; /* RW */ + unsigned long undef_52_62:11; /* Undefined */ + unsigned long enable:1; /* RW */ + } s5; }; /* ========================================================================= */ -/* UVH_NODE_PRESENT_TABLE */ +/* UVH_RH_GAM_ADDR_MAP_CONFIG */ /* ========================================================================= */ -#define UVH_NODE_PRESENT_TABLE 0x1400UL +#define UVH_RH_GAM_ADDR_MAP_CONFIG ( \ + is_uv(UV4) ? 0x480000UL : \ + is_uv(UV3) ? 0x1600000UL : \ + is_uv(UV2) ? 0x1600000UL : \ + 0) -#define UV1H_NODE_PRESENT_TABLE_DEPTH 16 -#define UV2H_NODE_PRESENT_TABLE_DEPTH 16 -#define UV3H_NODE_PRESENT_TABLE_DEPTH 16 -#define UV4H_NODE_PRESENT_TABLE_DEPTH 4 -#define UVH_NODE_PRESENT_TABLE_DEPTH ( \ - is_uv1_hub() ? UV1H_NODE_PRESENT_TABLE_DEPTH : \ - is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH : \ - is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH : \ - /*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH) -#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 -#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL +/* UVXH common defines */ +#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6 +#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000003c0UL +/* UV3 unique defines */ +#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0 +#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL -union uvh_node_present_table_u { +/* UV2 unique defines */ +#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0 +#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL + + +union uvh_rh_gam_addr_map_config_u { unsigned long v; - struct uvh_node_present_table_s { - unsigned long nodes:64; /* RW */ + + /* UVH common struct */ + struct uvh_rh_gam_addr_map_config_s { + unsigned long rsvd_0_5:6; + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_addr_map_config_s { + unsigned long rsvd_0_5:6; + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } sx; + + /* UV4 unique struct */ + struct uv4h_rh_gam_addr_map_config_s { + unsigned long rsvd_0_5:6; + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_addr_map_config_s { + unsigned long m_skt:6; /* RW */ + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s3; + + /* UV2 unique struct */ + struct uv2h_rh_gam_addr_map_config_s { + unsigned long m_skt:6; /* RW */ + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s2; }; /* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ +/* UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR) - -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - - -union uvh_rh_gam_alias210_overlay_config_0_mmr_u { +#define UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x4800c8UL : \ + is_uv(UV3) ? 0x16000c8UL : \ + is_uv(UV2) ? 0x16000c8UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + + +union uvh_rh_gam_alias_0_overlay_config_u { unsigned long v; - struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_alias_0_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; @@ -3095,15 +3465,9 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; - struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_0_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; @@ -3111,15 +3475,19 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } sx; - struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s { + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_0_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; unsigned long m_alias:5; /* RW */ unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s { + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_0_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; @@ -3127,183 +3495,96 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s3; - struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s { + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_0_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; unsigned long m_alias:5; /* RW */ unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ - } s4; + } s2; }; /* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ +/* UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR) - -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - - -union uvh_rh_gam_alias210_overlay_config_1_mmr_u { +#define UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG ( \ + is_uv(UV4) ? 0x4800d0UL : \ + is_uv(UV3) ? 0x16000d0UL : \ + is_uv(UV2) ? 0x16000d0UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL + + +union uvh_rh_gam_alias_0_redirect_config_u { unsigned long v; - struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_alias_0_redirect_config_s { unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; } s; - struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_0_redirect_config_s { unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; } sx; - struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s { + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_0_redirect_config_s { unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_0_redirect_config_s { unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; } s3; - struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s { + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_0_redirect_config_s { unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s4; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; }; /* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ +/* UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR) - -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - - -union uvh_rh_gam_alias210_overlay_config_2_mmr_u { +#define UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x4800d8UL : \ + is_uv(UV3) ? 0x16000d8UL : \ + is_uv(UV2) ? 0x16000d8UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + + +union uvh_rh_gam_alias_1_overlay_config_u { unsigned long v; - struct uvh_rh_gam_alias210_overlay_config_2_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_alias_1_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; @@ -3311,15 +3592,9 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; - struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_1_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; @@ -3327,15 +3602,19 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } sx; - struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s { + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_1_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; unsigned long m_alias:5; /* RW */ unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s { + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_1_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; @@ -3343,390 +3622,289 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s3; - struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s { + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_1_overlay_config_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; unsigned long m_alias:5; /* RW */ unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ - } s4; + } s2; }; /* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ +/* UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR) - -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG ( \ + is_uv(UV4) ? 0x4800e0UL : \ + is_uv(UV3) ? 0x16000e0UL : \ + is_uv(UV2) ? 0x16000e0UL : \ + 0) -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL - - -union uvh_rh_gam_alias210_redirect_config_0_mmr_u { +union uvh_rh_gam_alias_1_redirect_config_u { unsigned long v; - struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_alias_1_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; - struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s1; - struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_1_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } sx; - struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s { + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_1_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; - } s2; - struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s { + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_1_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s3; - struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s { + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_1_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; - } s4; + } s2; }; /* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ +/* UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR) - -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x4800e8UL : \ + is_uv(UV3) ? 0x16000e8UL : \ + is_uv(UV2) ? 0x16000e8UL : \ + 0) -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL - - -union uvh_rh_gam_alias210_redirect_config_1_mmr_u { +union uvh_rh_gam_alias_2_overlay_config_u { unsigned long v; - struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_alias_2_overlay_config_s { unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ } s; - struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s1; - struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_2_overlay_config_s { unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ } sx; - struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s { + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_2_overlay_config_s { unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s2; - struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_2_overlay_config_s { unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ } s3; - struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s { + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_2_overlay_config_s { unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s4; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; }; /* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ +/* UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR) +#define UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG ( \ + is_uv(UV4) ? 0x4800f0UL : \ + is_uv(UV3) ? 0x16000f0UL : \ + is_uv(UV2) ? 0x16000f0UL : \ + 0) -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL +/* UVXH common defines */ +#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - - -union uvh_rh_gam_alias210_redirect_config_2_mmr_u { +union uvh_rh_gam_alias_2_redirect_config_u { unsigned long v; - struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_alias_2_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; - struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s1; - struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_alias_2_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } sx; - struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s { + + /* UV4 unique struct */ + struct uv4h_rh_gam_alias_2_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; - } s2; - struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s { + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_alias_2_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s3; - struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s { + + /* UV2 unique struct */ + struct uv2h_rh_gam_alias_2_redirect_config_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; - } s4; -}; - -/* ========================================================================= */ -/* UVH_RH_GAM_CONFIG_MMR */ -/* ========================================================================= */ -#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL -#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL -#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL -#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL -#define UVH_RH_GAM_CONFIG_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_CONFIG_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR) - -#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL - -#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 -#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 -#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL -#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL -#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL - -#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL - -#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 -#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL -#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL - -#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 -#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL -#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL - -#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 -#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL - - -union uvh_rh_gam_config_mmr_u { - unsigned long v; - struct uvh_rh_gam_config_mmr_s { - unsigned long rsvd_0_5:6; - unsigned long n_skt:4; /* RW */ - unsigned long rsvd_10_63:54; - } s; - struct uv1h_rh_gam_config_mmr_s { - unsigned long m_skt:6; /* RW */ - unsigned long n_skt:4; /* RW */ - unsigned long rsvd_10_11:2; - unsigned long mmiol_cfg:1; /* RW */ - unsigned long rsvd_13_63:51; - } s1; - struct uvxh_rh_gam_config_mmr_s { - unsigned long rsvd_0_5:6; - unsigned long n_skt:4; /* RW */ - unsigned long rsvd_10_63:54; - } sx; - struct uv2h_rh_gam_config_mmr_s { - unsigned long m_skt:6; /* RW */ - unsigned long n_skt:4; /* RW */ - unsigned long rsvd_10_63:54; } s2; - struct uv3h_rh_gam_config_mmr_s { - unsigned long m_skt:6; /* RW */ - unsigned long n_skt:4; /* RW */ - unsigned long rsvd_10_63:54; - } s3; - struct uv4h_rh_gam_config_mmr_s { - unsigned long rsvd_0_5:6; - unsigned long n_skt:4; /* RW */ - unsigned long rsvd_10_63:54; - } s4; }; /* ========================================================================= */ -/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ +/* UVH_RH_GAM_GRU_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL -#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL -#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR) - -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 -#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL -#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT 62 -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL -#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK ( \ - is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \ - is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \ - is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK) -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT ( \ - is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \ - is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \ - is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \ - /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT) - -union uvh_rh_gam_gru_overlay_config_mmr_u { +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x480010UL : \ + is_uv(UV3) ? 0x1600010UL : \ + is_uv(UV2) ? 0x1600010UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52 +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x00f0000000000000UL +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26 +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26 +#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffffc000000UL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_SHFT 62 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_MASK 0x4000000000000000UL + +/* UV2 unique defines */ +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL + +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffff0000000UL : \ + is_uv(UV2) ? 0x00003ffff0000000UL : \ + 0) +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 28 : \ + is_uv(UV2) ? 28 : \ + -1) + +union uvh_rh_gam_gru_overlay_config_u { unsigned long v; - struct uvh_rh_gam_gru_overlay_config_mmr_s { - unsigned long rsvd_0_51:52; + + /* UVH common struct */ + struct uvh_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_45:46; + unsigned long rsvd_46_51:6; unsigned long n_gru:4; /* RW */ unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ } s; - struct uv1h_rh_gam_gru_overlay_config_mmr_s { - unsigned long rsvd_0_27:28; - unsigned long base:18; /* RW */ - unsigned long rsvd_46_47:2; - unsigned long gr4:1; /* RW */ - unsigned long rsvd_49_51:3; - unsigned long n_gru:4; /* RW */ - unsigned long rsvd_56_62:7; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_gru_overlay_config_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_gru_overlay_config_s { unsigned long rsvd_0_45:46; unsigned long rsvd_46_51:6; unsigned long n_gru:4; /* RW */ unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ } sx; - struct uv2h_rh_gam_gru_overlay_config_mmr_s { - unsigned long rsvd_0_27:28; - unsigned long base:18; /* RW */ + + /* UV4A unique struct */ + struct uv4ah_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_24:25; + unsigned long undef_25:1; /* Undefined */ + unsigned long base:26; /* RW */ + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_24:25; + unsigned long undef_25:1; /* Undefined */ + unsigned long base:20; /* RW */ unsigned long rsvd_46_51:6; unsigned long n_gru:4; /* RW */ unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_gru_overlay_config_mmr_s { + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_gru_overlay_config_s { unsigned long rsvd_0_27:28; unsigned long base:18; /* RW */ unsigned long rsvd_46_51:6; @@ -3735,88 +3913,141 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long mode:1; /* RW */ unsigned long enable:1; /* RW */ } s3; - struct uv4h_rh_gam_gru_overlay_config_mmr_s { - unsigned long rsvd_0_24:25; - unsigned long undef_25:1; /* Undefined */ - unsigned long base:20; /* RW */ + + /* UV2 unique struct */ + struct uv2h_rh_gam_gru_overlay_config_s { + unsigned long rsvd_0_27:28; + unsigned long base:18; /* RW */ unsigned long rsvd_46_51:6; unsigned long n_gru:4; /* RW */ unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s4; + } s2; }; /* ========================================================================= */ -/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR") -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR") -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR) - - -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52 -#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL -#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL -#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT) - -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK) - -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK) - -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK) - -union uvh_rh_gam_mmioh_overlay_config0_mmr_u { +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG ( \ + is_uv(UV2) ? 0x1600030UL : \ + 0) + + + +/* UV2 unique defines */ +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT 27 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_MASK 0x00003ffff8000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_SHFT 46 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_MASK 0x000fc00000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_SHFT 52 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_MASK 0x00f0000000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV2) ? 27 : \ + uv_undefined("UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT")) + +union uvh_rh_gam_mmioh_overlay_config_u { unsigned long v; - struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_overlay_config_s { + unsigned long rsvd_0_26:27; + unsigned long base:19; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_overlay_config_s { + unsigned long rsvd_0_26:27; + unsigned long base:19; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } sx; + + /* UV2 unique struct */ + struct uv2h_rh_gam_mmioh_overlay_config_s { + unsigned long rsvd_0_26:27; + unsigned long base:19; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s2; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 ( \ + is_uv(UV4) ? 0x483000UL : \ + is_uv(UV3) ? 0x1603000UL : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + 0) +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 26 : \ + -1) + +union uvh_rh_gam_mmioh_overlay_config0_u { + unsigned long v; + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_overlay_config0_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long m_io:6; /* RW */ unsigned long n_io:4; unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s3; - struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s { + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_overlay_config0_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long m_io:6; /* RW */ unsigned long n_io:4; unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s4; + } sx; + + /* UV4A unique struct */ struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s { unsigned long rsvd_0_25:26; unsigned long base:26; /* RW */ @@ -3825,73 +4056,94 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u { unsigned long undef_62:1; /* Undefined */ unsigned long enable:1; /* RW */ } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_overlay_config0_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; }; /* ========================================================================= */ -/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 */ /* ========================================================================= */ -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR) - - -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52 -#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL -#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL - -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT) - -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK) - -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK) - -union uvh_rh_gam_mmioh_overlay_config1_mmr_u { +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 ( \ + is_uv(UV4) ? 0x484000UL : \ + is_uv(UV3) ? 0x1604000UL : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + 0) +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 26 : \ + -1) + +union uvh_rh_gam_mmioh_overlay_config1_u { unsigned long v; - struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_overlay_config1_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long m_io:6; /* RW */ unsigned long n_io:4; unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s3; - struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s { + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_overlay_config1_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long m_io:6; /* RW */ unsigned long n_io:4; unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s4; + } sx; + + /* UV4A unique struct */ struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s { unsigned long rsvd_0_25:26; unsigned long base:26; /* RW */ @@ -3900,278 +4152,289 @@ union uvh_rh_gam_mmioh_overlay_config1_mmr_u { unsigned long undef_62:1; /* Undefined */ unsigned long enable:1; /* RW */ } s4a; -}; -/* ========================================================================= */ -/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ -/* ========================================================================= */ -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR") -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR") -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR) - - -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL -#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - - -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27 -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL -#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - - -union uvh_rh_gam_mmioh_overlay_config_mmr_u { - unsigned long v; - struct uv1h_rh_gam_mmioh_overlay_config_mmr_s { - unsigned long rsvd_0_29:30; - unsigned long base:16; /* RW */ + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ unsigned long m_io:6; /* RW */ - unsigned long n_io:4; /* RW */ + unsigned long n_io:4; unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s1; - struct uv2h_rh_gam_mmioh_overlay_config_mmr_s { - unsigned long rsvd_0_26:27; - unsigned long base:19; /* RW */ + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_overlay_config1_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ unsigned long m_io:6; /* RW */ - unsigned long n_io:4; /* RW */ + unsigned long n_io:4; unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ - } s2; + } s3; }; /* ========================================================================= */ -/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 */ /* ========================================================================= */ -#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR") -#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR") -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL -#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR) - -#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH") -#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH") -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 -#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ - is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH) - - -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL - -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL - -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL - -#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK) - -union uvh_rh_gam_mmioh_redirect_config0_mmr_u { +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 ( \ + is_uv(UV4) ? 0x483800UL : \ + is_uv(UV3) ? 0x1603800UL : \ + 0) + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \ + is_uv(UV4) ? 128 : \ + is_uv(UV3) ? 128 : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL + +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + 0) + + +union uvh_rh_gam_mmioh_redirect_config0_u { unsigned long v; - struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_redirect_config0_s { unsigned long nasid:15; /* RW */ unsigned long rsvd_15_63:49; - } s3; - struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s { + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_redirect_config0_s { unsigned long nasid:15; /* RW */ unsigned long rsvd_15_63:49; - } s4; - struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s { + } sx; + + struct uv4ah_rh_gam_mmioh_redirect_config0_s { unsigned long nasid:12; /* RW */ unsigned long rsvd_12_63:52; } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_redirect_config0_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_redirect_config0_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; }; /* ========================================================================= */ -/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 */ /* ========================================================================= */ -#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR") -#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR") -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL -#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR) - -#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH") -#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH") -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 -#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ - is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH) - - -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL - -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 -#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL - -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL - -#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK ( \ - is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \ - is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK) - -union uvh_rh_gam_mmioh_redirect_config1_mmr_u { +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 ( \ + is_uv(UV4) ? 0x484800UL : \ + is_uv(UV3) ? 0x1604800UL : \ + 0) + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \ + is_uv(UV4) ? 128 : \ + is_uv(UV3) ? 128 : \ + 0) + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000000fffUL + +/* UV4 unique defines */ +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL + +/* UV3 unique defines */ +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL + +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + 0) + + +union uvh_rh_gam_mmioh_redirect_config1_u { unsigned long v; - struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_mmioh_redirect_config1_s { unsigned long nasid:15; /* RW */ unsigned long rsvd_15_63:49; - } s3; - struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s { + } s; + + /* UVXH common struct */ + struct uvxh_rh_gam_mmioh_redirect_config1_s { unsigned long nasid:15; /* RW */ unsigned long rsvd_15_63:49; - } s4; - struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s { + } sx; + + struct uv4ah_rh_gam_mmioh_redirect_config1_s { unsigned long nasid:12; /* RW */ unsigned long rsvd_12_63:52; } s4a; + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmioh_redirect_config1_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmioh_redirect_config1_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; }; /* ========================================================================= */ -/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ +/* UVH_RH_GAM_MMR_OVERLAY_CONFIG */ /* ========================================================================= */ -#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL -#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL -#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL -#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR ( \ - is_uv1_hub() ? UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \ - is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \ - is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \ - /*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR) - -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 -#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL -#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - - -union uvh_rh_gam_mmr_overlay_config_mmr_u { +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG ( \ + is_uv(UV4) ? 0x480028UL : \ + is_uv(UV3) ? 0x1600028UL : \ + is_uv(UV2) ? 0x1600028UL : \ + 0) + + +/* UVXH common defines */ +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 26 +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + is_uv(UV2) ? 0x00003ffffc000000UL : \ + 0) +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +/* UV4A unique defines */ +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26 +#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL + +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \ + is_uv(UV4A) ? 0x000ffffffc000000UL : \ + is_uv(UV4) ? 0x00003ffffc000000UL : \ + is_uv(UV3) ? 0x00003ffffc000000UL : \ + is_uv(UV2) ? 0x00003ffffc000000UL : \ + 0) + +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \ + is_uv(UV4) ? 26 : \ + is_uv(UV3) ? 26 : \ + is_uv(UV2) ? 26 : \ + -1) + +union uvh_rh_gam_mmr_overlay_config_u { unsigned long v; - struct uvh_rh_gam_mmr_overlay_config_mmr_s { + + /* UVH common struct */ + struct uvh_rh_gam_mmr_overlay_config_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long rsvd_46_62:17; unsigned long enable:1; /* RW */ } s; - struct uv1h_rh_gam_mmr_overlay_config_mmr_s { - unsigned long rsvd_0_25:26; - unsigned long base:20; /* RW */ - unsigned long dual_hub:1; /* RW */ - unsigned long rsvd_47_62:16; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_mmr_overlay_config_mmr_s { + + /* UVXH common struct */ + struct uvxh_rh_gam_mmr_overlay_config_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long rsvd_46_62:17; unsigned long enable:1; /* RW */ } sx; - struct uv2h_rh_gam_mmr_overlay_config_mmr_s { + + /* UV4 unique struct */ + struct uv4h_rh_gam_mmr_overlay_config_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long rsvd_46_62:17; unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_mmr_overlay_config_mmr_s { + } s4; + + /* UV3 unique struct */ + struct uv3h_rh_gam_mmr_overlay_config_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long rsvd_46_62:17; unsigned long enable:1; /* RW */ } s3; - struct uv4h_rh_gam_mmr_overlay_config_mmr_s { + + /* UV2 unique struct */ + struct uv2h_rh_gam_mmr_overlay_config_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long rsvd_46_62:17; unsigned long enable:1; /* RW */ - } s4; + } s2; }; /* ========================================================================= */ /* UVH_RTC */ /* ========================================================================= */ -#define UV1H_RTC 0x340000UL -#define UV2H_RTC 0x340000UL -#define UV3H_RTC 0x340000UL -#define UV4H_RTC 0xe0000UL #define UVH_RTC ( \ - is_uv1_hub() ? UV1H_RTC : \ - is_uv2_hub() ? UV2H_RTC : \ - is_uv3_hub() ? UV3H_RTC : \ - /*is_uv4_hub*/ UV4H_RTC) + is_uv(UV5) ? 0xe0000UL : \ + is_uv(UV4) ? 0xe0000UL : \ + is_uv(UV3) ? 0x340000UL : \ + is_uv(UV2) ? 0x340000UL : \ + 0) +/* UVH common defines*/ #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL union uvh_rtc_u { unsigned long v; + + /* UVH common struct */ struct uvh_rtc_s { unsigned long real_time_clock:56; /* RW */ unsigned long rsvd_56_63:8; } s; + + /* UV5 unique struct */ + struct uv5h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s5; + + /* UV4 unique struct */ + struct uv4h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s4; + + /* UV3 unique struct */ + struct uv3h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; + + /* UV2 unique struct */ + struct uv2h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s2; }; /* ========================================================================= */ @@ -4179,26 +4442,29 @@ union uvh_rtc_u { /* ========================================================================= */ #define UVH_RTC1_INT_CONFIG 0x615c0UL +/* UVH common defines*/ #define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 -#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 -#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 -#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 -#define UVH_RTC1_INT_CONFIG_P_SHFT 13 -#define UVH_RTC1_INT_CONFIG_T_SHFT 15 -#define UVH_RTC1_INT_CONFIG_M_SHFT 16 -#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 #define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 #define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 #define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 #define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC1_INT_CONFIG_P_SHFT 13 #define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC1_INT_CONFIG_T_SHFT 15 #define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC1_INT_CONFIG_M_SHFT 16 #define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL union uvh_rtc1_int_config_u { unsigned long v; + + /* UVH common struct */ struct uvh_rtc1_int_config_s { unsigned long vector_:8; /* RW */ unsigned long dm:3; /* RW */ @@ -4211,618 +4477,175 @@ union uvh_rtc1_int_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; -}; - -/* ========================================================================= */ -/* UVH_SCRATCH5 */ -/* ========================================================================= */ -#define UV1H_SCRATCH5 0x2d0200UL -#define UV2H_SCRATCH5 0x2d0200UL -#define UV3H_SCRATCH5 0x2d0200UL -#define UV4H_SCRATCH5 0xb0200UL -#define UVH_SCRATCH5 ( \ - is_uv1_hub() ? UV1H_SCRATCH5 : \ - is_uv2_hub() ? UV2H_SCRATCH5 : \ - is_uv3_hub() ? UV3H_SCRATCH5 : \ - /*is_uv4_hub*/ UV4H_SCRATCH5) - -#define UV1H_SCRATCH5_32 0x778 -#define UV2H_SCRATCH5_32 0x778 -#define UV3H_SCRATCH5_32 0x778 -#define UV4H_SCRATCH5_32 0x798 -#define UVH_SCRATCH5_32 ( \ - is_uv1_hub() ? UV1H_SCRATCH5_32 : \ - is_uv2_hub() ? UV2H_SCRATCH5_32 : \ - is_uv3_hub() ? UV3H_SCRATCH5_32 : \ - /*is_uv4_hub*/ UV4H_SCRATCH5_32) - -#define UVH_SCRATCH5_SCRATCH5_SHFT 0 -#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL - -union uvh_scratch5_u { - unsigned long v; - struct uvh_scratch5_s { - unsigned long scratch5:64; /* RW, W1CS */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SCRATCH5_ALIAS */ -/* ========================================================================= */ -#define UV1H_SCRATCH5_ALIAS 0x2d0208UL -#define UV2H_SCRATCH5_ALIAS 0x2d0208UL -#define UV3H_SCRATCH5_ALIAS 0x2d0208UL -#define UV4H_SCRATCH5_ALIAS 0xb0208UL -#define UVH_SCRATCH5_ALIAS ( \ - is_uv1_hub() ? UV1H_SCRATCH5_ALIAS : \ - is_uv2_hub() ? UV2H_SCRATCH5_ALIAS : \ - is_uv3_hub() ? UV3H_SCRATCH5_ALIAS : \ - /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS) - -#define UV1H_SCRATCH5_ALIAS_32 0x780 -#define UV2H_SCRATCH5_ALIAS_32 0x780 -#define UV3H_SCRATCH5_ALIAS_32 0x780 -#define UV4H_SCRATCH5_ALIAS_32 0x7a0 -#define UVH_SCRATCH5_ALIAS_32 ( \ - is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_32 : \ - is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 : \ - is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 : \ - /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32) - - -/* ========================================================================= */ -/* UVH_SCRATCH5_ALIAS_2 */ -/* ========================================================================= */ -#define UV1H_SCRATCH5_ALIAS_2 0x2d0210UL -#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL -#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL -#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL -#define UVH_SCRATCH5_ALIAS_2 ( \ - is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_2 : \ - is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 : \ - is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 : \ - /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2) -#define UVH_SCRATCH5_ALIAS_2_32 0x788 - - -/* ========================================================================= */ -/* UVXH_EVENT_OCCURRED2 */ -/* ========================================================================= */ -#define UVXH_EVENT_OCCURRED2 0x70100UL - -#define UV2H_EVENT_OCCURRED2_32 0xb68 -#define UV3H_EVENT_OCCURRED2_32 0xb68 -#define UV4H_EVENT_OCCURRED2_32 0x608 -#define UVH_EVENT_OCCURRED2_32 ( \ - is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 : \ - is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 : \ - /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32) - - -#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 -#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 -#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 -#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 -#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 -#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 -#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 -#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 -#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 -#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 -#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 -#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 -#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 -#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 -#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 -#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 -#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 -#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 -#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 -#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 -#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 -#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 -#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 -#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 -#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 -#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 -#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 -#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 -#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 -#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 -#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 -#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 -#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL -#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL -#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL -#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL -#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL -#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL -#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL -#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL -#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL -#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL -#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL -#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL -#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL -#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL -#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL -#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL -#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL -#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL -#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL -#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL -#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL -#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL -#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL -#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL -#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL -#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL -#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL -#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL -#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL -#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL -#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL -#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL - -#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0 -#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1 -#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2 -#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3 -#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4 -#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5 -#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6 -#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7 -#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8 -#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9 -#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10 -#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11 -#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12 -#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13 -#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14 -#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15 -#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16 -#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17 -#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18 -#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19 -#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20 -#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21 -#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22 -#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23 -#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24 -#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25 -#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26 -#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27 -#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28 -#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29 -#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30 -#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31 -#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL -#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL -#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL -#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL -#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL -#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL -#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL -#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL -#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL -#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL -#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL -#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL -#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL -#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL -#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL -#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL -#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL -#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL -#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL -#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL -#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL -#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL -#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL -#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL -#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL -#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL -#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL -#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL -#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL -#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL -#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL -#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + /* UV5 unique struct */ + struct uv5h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s5; -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15 -#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16 -#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17 -#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18 -#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19 -#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20 -#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21 -#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22 -#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23 -#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24 -#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25 -#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26 -#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27 -#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28 -#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29 -#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30 -#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31 -#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32 -#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33 -#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34 -#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35 -#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36 -#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37 -#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38 -#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39 -#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40 -#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41 -#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42 -#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43 -#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44 -#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45 -#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46 -#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47 -#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48 -#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49 -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL -#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL -#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL -#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL -#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL -#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL -#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL -#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL -#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL -#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL -#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL -#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL -#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL -#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL -#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL -#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL -#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL -#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL -#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL -#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL -#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL -#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL -#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL -#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL + /* UV4 unique struct */ + struct uv4h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s4; -#define UVXH_EVENT_OCCURRED2_RTC_1_MASK ( \ - is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK : \ - is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK : \ - /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK) + /* UV3 unique struct */ + struct uv3h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; -union uvh_event_occurred2_u { - unsigned long v; - struct uv2h_event_occurred2_s { - unsigned long rtc_0:1; /* RW */ - unsigned long rtc_1:1; /* RW */ - unsigned long rtc_2:1; /* RW */ - unsigned long rtc_3:1; /* RW */ - unsigned long rtc_4:1; /* RW */ - unsigned long rtc_5:1; /* RW */ - unsigned long rtc_6:1; /* RW */ - unsigned long rtc_7:1; /* RW */ - unsigned long rtc_8:1; /* RW */ - unsigned long rtc_9:1; /* RW */ - unsigned long rtc_10:1; /* RW */ - unsigned long rtc_11:1; /* RW */ - unsigned long rtc_12:1; /* RW */ - unsigned long rtc_13:1; /* RW */ - unsigned long rtc_14:1; /* RW */ - unsigned long rtc_15:1; /* RW */ - unsigned long rtc_16:1; /* RW */ - unsigned long rtc_17:1; /* RW */ - unsigned long rtc_18:1; /* RW */ - unsigned long rtc_19:1; /* RW */ - unsigned long rtc_20:1; /* RW */ - unsigned long rtc_21:1; /* RW */ - unsigned long rtc_22:1; /* RW */ - unsigned long rtc_23:1; /* RW */ - unsigned long rtc_24:1; /* RW */ - unsigned long rtc_25:1; /* RW */ - unsigned long rtc_26:1; /* RW */ - unsigned long rtc_27:1; /* RW */ - unsigned long rtc_28:1; /* RW */ - unsigned long rtc_29:1; /* RW */ - unsigned long rtc_30:1; /* RW */ - unsigned long rtc_31:1; /* RW */ - unsigned long rsvd_32_63:32; + /* UV2 unique struct */ + struct uv2h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ } s2; - struct uv3h_event_occurred2_s { - unsigned long rtc_0:1; /* RW */ - unsigned long rtc_1:1; /* RW */ - unsigned long rtc_2:1; /* RW */ - unsigned long rtc_3:1; /* RW */ - unsigned long rtc_4:1; /* RW */ - unsigned long rtc_5:1; /* RW */ - unsigned long rtc_6:1; /* RW */ - unsigned long rtc_7:1; /* RW */ - unsigned long rtc_8:1; /* RW */ - unsigned long rtc_9:1; /* RW */ - unsigned long rtc_10:1; /* RW */ - unsigned long rtc_11:1; /* RW */ - unsigned long rtc_12:1; /* RW */ - unsigned long rtc_13:1; /* RW */ - unsigned long rtc_14:1; /* RW */ - unsigned long rtc_15:1; /* RW */ - unsigned long rtc_16:1; /* RW */ - unsigned long rtc_17:1; /* RW */ - unsigned long rtc_18:1; /* RW */ - unsigned long rtc_19:1; /* RW */ - unsigned long rtc_20:1; /* RW */ - unsigned long rtc_21:1; /* RW */ - unsigned long rtc_22:1; /* RW */ - unsigned long rtc_23:1; /* RW */ - unsigned long rtc_24:1; /* RW */ - unsigned long rtc_25:1; /* RW */ - unsigned long rtc_26:1; /* RW */ - unsigned long rtc_27:1; /* RW */ - unsigned long rtc_28:1; /* RW */ - unsigned long rtc_29:1; /* RW */ - unsigned long rtc_30:1; /* RW */ - unsigned long rtc_31:1; /* RW */ - unsigned long rsvd_32_63:32; - } s3; - struct uv4h_event_occurred2_s { - unsigned long message_accelerator_int0:1; /* RW */ - unsigned long message_accelerator_int1:1; /* RW */ - unsigned long message_accelerator_int2:1; /* RW */ - unsigned long message_accelerator_int3:1; /* RW */ - unsigned long message_accelerator_int4:1; /* RW */ - unsigned long message_accelerator_int5:1; /* RW */ - unsigned long message_accelerator_int6:1; /* RW */ - unsigned long message_accelerator_int7:1; /* RW */ - unsigned long message_accelerator_int8:1; /* RW */ - unsigned long message_accelerator_int9:1; /* RW */ - unsigned long message_accelerator_int10:1; /* RW */ - unsigned long message_accelerator_int11:1; /* RW */ - unsigned long message_accelerator_int12:1; /* RW */ - unsigned long message_accelerator_int13:1; /* RW */ - unsigned long message_accelerator_int14:1; /* RW */ - unsigned long message_accelerator_int15:1; /* RW */ - unsigned long rtc_interval_int:1; /* RW */ - unsigned long bau_dashboard_int:1; /* RW */ - unsigned long rtc_0:1; /* RW */ - unsigned long rtc_1:1; /* RW */ - unsigned long rtc_2:1; /* RW */ - unsigned long rtc_3:1; /* RW */ - unsigned long rtc_4:1; /* RW */ - unsigned long rtc_5:1; /* RW */ - unsigned long rtc_6:1; /* RW */ - unsigned long rtc_7:1; /* RW */ - unsigned long rtc_8:1; /* RW */ - unsigned long rtc_9:1; /* RW */ - unsigned long rtc_10:1; /* RW */ - unsigned long rtc_11:1; /* RW */ - unsigned long rtc_12:1; /* RW */ - unsigned long rtc_13:1; /* RW */ - unsigned long rtc_14:1; /* RW */ - unsigned long rtc_15:1; /* RW */ - unsigned long rtc_16:1; /* RW */ - unsigned long rtc_17:1; /* RW */ - unsigned long rtc_18:1; /* RW */ - unsigned long rtc_19:1; /* RW */ - unsigned long rtc_20:1; /* RW */ - unsigned long rtc_21:1; /* RW */ - unsigned long rtc_22:1; /* RW */ - unsigned long rtc_23:1; /* RW */ - unsigned long rtc_24:1; /* RW */ - unsigned long rtc_25:1; /* RW */ - unsigned long rtc_26:1; /* RW */ - unsigned long rtc_27:1; /* RW */ - unsigned long rtc_28:1; /* RW */ - unsigned long rtc_29:1; /* RW */ - unsigned long rtc_30:1; /* RW */ - unsigned long rtc_31:1; /* RW */ - unsigned long rsvd_50_63:14; - } s4; }; /* ========================================================================= */ -/* UVXH_EVENT_OCCURRED2_ALIAS */ -/* ========================================================================= */ -#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL - -#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 -#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70 -#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610 -#define UVH_EVENT_OCCURRED2_ALIAS_32 ( \ - is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 : \ - is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 : \ - /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32) - - -/* ========================================================================= */ -/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */ +/* UVH_SCRATCH5 */ /* ========================================================================= */ -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 ( \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2) - -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10 -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 ( \ - is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \ - is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \ - /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32) +#define UVH_SCRATCH5 ( \ + is_uv(UV5) ? 0xb0200UL : \ + is_uv(UV4) ? 0xb0200UL : \ + is_uv(UV3) ? 0x2d0200UL : \ + is_uv(UV2) ? 0x2d0200UL : \ + 0) +#define UV5H_SCRATCH5 0xb0200UL +#define UV4H_SCRATCH5 0xb0200UL +#define UV3H_SCRATCH5 0x2d0200UL +#define UV2H_SCRATCH5 0x2d0200UL -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL +/* UVH common defines*/ +#define UVH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL +/* UVXH common defines */ +#define UVXH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVXH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL +/* UVYH common defines */ +#define UVYH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVYH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 -#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL +/* UV5 unique defines */ +#define UV5H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV5H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL +/* UV4 unique defines */ +#define UV4H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV4H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL -union uvxh_lb_bau_sb_activation_status_2_u { - unsigned long v; - struct uvxh_lb_bau_sb_activation_status_2_s { - unsigned long aux_error:64; /* RW */ - } sx; - struct uv2h_lb_bau_sb_activation_status_2_s { - unsigned long aux_error:64; /* RW */ - } s2; - struct uv3h_lb_bau_sb_activation_status_2_s { - unsigned long aux_error:64; /* RW */ - } s3; - struct uv4h_lb_bau_sb_activation_status_2_s { - unsigned long aux_error:64; /* RW */ - } s4; -}; +/* UV3 unique defines */ +#define UV3H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV3H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL -/* ========================================================================= */ -/* UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK */ -/* ========================================================================= */ -#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL -#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x9f0 +/* UV2 unique defines */ +#define UV2H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV2H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL -#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 -#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL -union uv1h_lb_target_physical_apic_id_mask_u { +union uvh_scratch5_u { unsigned long v; - struct uv1h_lb_target_physical_apic_id_mask_s { - unsigned long bit_enables:32; /* RW */ - unsigned long rsvd_32_63:32; - } s1; -}; -/* ========================================================================= */ -/* UV3H_GR0_GAM_GR_CONFIG */ -/* ========================================================================= */ -#define UV3H_GR0_GAM_GR_CONFIG 0xc00028UL - -#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT 0 -#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10 -#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL -#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL + /* UVH common struct */ + struct uvh_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s; -union uv3h_gr0_gam_gr_config_u { - unsigned long v; - struct uv3h_gr0_gam_gr_config_s { - unsigned long m_skt:6; /* RW */ - unsigned long undef_6_9:4; /* Undefined */ - unsigned long subspace:1; /* RW */ - unsigned long reserved:53; - } s3; -}; + /* UVXH common struct */ + struct uvxh_scratch5_s { + unsigned long scratch5:64; /* RW */ + } sx; -/* ========================================================================= */ -/* UV4H_LB_PROC_INTD_QUEUE_FIRST */ -/* ========================================================================= */ -#define UV4H_LB_PROC_INTD_QUEUE_FIRST 0xa4100UL + /* UVYH common struct */ + struct uvyh_scratch5_s { + unsigned long scratch5:64; /* RW */ + } sy; -#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6 -#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL + /* UV5 unique struct */ + struct uv5h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s5; -union uv4h_lb_proc_intd_queue_first_u { - unsigned long v; - struct uv4h_lb_proc_intd_queue_first_s { - unsigned long undef_0_5:6; /* Undefined */ - unsigned long first_payload_address:40; /* RW */ + /* UV4 unique struct */ + struct uv4h_scratch5_s { + unsigned long scratch5:64; /* RW */ } s4; -}; - -/* ========================================================================= */ -/* UV4H_LB_PROC_INTD_QUEUE_LAST */ -/* ========================================================================= */ -#define UV4H_LB_PROC_INTD_QUEUE_LAST 0xa4108UL -#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5 -#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL + /* UV3 unique struct */ + struct uv3h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s3; -union uv4h_lb_proc_intd_queue_last_u { - unsigned long v; - struct uv4h_lb_proc_intd_queue_last_s { - unsigned long undef_0_4:5; /* Undefined */ - unsigned long last_payload_address:41; /* RW */ - } s4; + /* UV2 unique struct */ + struct uv2h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s2; }; /* ========================================================================= */ -/* UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR */ +/* UVH_SCRATCH5_ALIAS */ /* ========================================================================= */ -#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR 0xa4118UL - -#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0 -#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL +#define UVH_SCRATCH5_ALIAS ( \ + is_uv(UV5) ? 0xb0208UL : \ + is_uv(UV4) ? 0xb0208UL : \ + is_uv(UV3) ? 0x2d0208UL : \ + is_uv(UV2) ? 0x2d0208UL : \ + 0) +#define UV5H_SCRATCH5_ALIAS 0xb0208UL +#define UV4H_SCRATCH5_ALIAS 0xb0208UL +#define UV3H_SCRATCH5_ALIAS 0x2d0208UL +#define UV2H_SCRATCH5_ALIAS 0x2d0208UL -union uv4h_lb_proc_intd_soft_ack_clear_u { - unsigned long v; - struct uv4h_lb_proc_intd_soft_ack_clear_s { - unsigned long soft_ack_pending_flags:8; /* WP */ - } s4; -}; /* ========================================================================= */ -/* UV4H_LB_PROC_INTD_SOFT_ACK_PENDING */ +/* UVH_SCRATCH5_ALIAS_2 */ /* ========================================================================= */ -#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING 0xa4110UL - -#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0 -#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL +#define UVH_SCRATCH5_ALIAS_2 ( \ + is_uv(UV5) ? 0xb0210UL : \ + is_uv(UV4) ? 0xb0210UL : \ + is_uv(UV3) ? 0x2d0210UL : \ + is_uv(UV2) ? 0x2d0210UL : \ + 0) +#define UV5H_SCRATCH5_ALIAS_2 0xb0210UL +#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL +#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL +#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL -union uv4h_lb_proc_intd_soft_ack_pending_u { - unsigned long v; - struct uv4h_lb_proc_intd_soft_ack_pending_s { - unsigned long soft_ack_flags:8; /* RW */ - } s4; -}; #endif /* _ASM_X86_UV_UV_MMRS_H */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 27566e57e87d..b7253ef3205a 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -15,36 +15,29 @@ struct vdso_image { unsigned long size; /* Always a multiple of PAGE_SIZE */ unsigned long alt, alt_len; + unsigned long extable_base, extable_len; + const void *extable; - long sym_vvar_start; /* Negative offset to the vvar area */ - - long sym_vvar_page; - long sym_hpet_page; - long sym_pvclock_page; - long sym_hvclock_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; long sym___kernel_vsyscall; long sym_int80_landing_pad; + long sym_vdso32_sigreturn_landing_pad; + long sym_vdso32_rt_sigreturn_landing_pad; }; -#ifdef CONFIG_X86_64 extern const struct vdso_image vdso_image_64; -#endif - -#ifdef CONFIG_X86_X32 extern const struct vdso_image vdso_image_x32; -#endif - -#if defined CONFIG_X86_32 || defined CONFIG_COMPAT extern const struct vdso_image vdso_image_32; -#endif -extern void __init init_vdso_image(const struct vdso_image *image); +extern int __init init_vdso_image(const struct vdso_image *image); extern int map_vdso_once(const struct vdso_image *image, unsigned long addr); +extern bool fixup_vdso_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr); #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_VDSO_H */ diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..136e5e57cfe1 --- /dev/null +++ b/arch/x86/include/asm/vdso/clocksource.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_CLOCKSOURCE_H +#define __ASM_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_TSC, \ + VDSO_CLOCKMODE_PVCLOCK, \ + VDSO_CLOCKMODE_HVCLOCK + +#define HAVE_VDSO_CLOCKMODE_HVCLOCK + +#endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..ff1c11b9fa27 --- /dev/null +++ b/arch/x86/include/asm/vdso/getrandom.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLER__ + +#include <asm/unistd.h> + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) +{ + long ret; + + asm ("syscall" : "=a" (ret) : + "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) : + "rcx", "r11", "memory"); + + return ret; +} + +#endif /* !__ASSEMBLER__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..73b2e7ee8f0f --- /dev/null +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -0,0 +1,336 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright (C) 2019 ARM Limited. + * Copyright 2006 Andi Kleen, SUSE Labs. + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLER__ + +#include <uapi/linux/time.h> +#include <asm/vgtod.h> +#include <asm/unistd.h> +#include <asm/msr.h> +#include <asm/pvclock.h> +#include <clocksource/hyperv_timer.h> + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +/* + * Declare the memory-mapped vclock data pages. These come from hypervisors. + * If we ever reintroduce something like direct access to an MMIO clock like + * the HPET again, it will go here as well. + * + * A load from any of these pages will segfault if the clock in question is + * disabled, so appropriate compiler barriers and checks need to be used + * to prevent stray loads. + * + * These declarations MUST NOT be const. The compiler will assume that + * an extern const variable has genuinely constant contents, and the + * resulting code won't work, since the whole point is that these pages + * change over time, possibly while we're accessing them. + */ + +#ifdef CONFIG_PARAVIRT_CLOCK +/* + * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO + * if the hypervisor tells us that all vCPUs can get valid data from the + * vCPU 0 page. + */ +extern struct pvclock_vsyscall_time_info pvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_HYPERV_TIMER +extern struct ms_hyperv_tsc_page hvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifndef BUILD_VDSO32 + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); + + return ret; +} + +static __always_inline +long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +#else + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) + : "memory", "edx"); + + return ret; +} + +static __always_inline long +clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK +static u64 vread_pvclock(void) +{ + const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; + u32 version; + u64 ret; + + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvclock_read_begin(pvti); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; + + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); + } while (pvclock_read_retry(pvti, version)); + + return ret & S64_MAX; +} +#endif + +#ifdef CONFIG_HYPERV_TIMER +static u64 vread_hvclock(void) +{ + u64 tsc, time; + + if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time)) + return time & S64_MAX; + + return U64_MAX; +} +#endif + +static inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_time_data *vd) +{ + if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) + return (u64)rdtsc_ordered() & S64_MAX; + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ +#ifdef CONFIG_PARAVIRT_CLOCK + if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) { + barrier(); + return vread_pvclock(); + } +#endif +#ifdef CONFIG_HYPERV_TIMER + if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) { + barrier(); + return vread_hvclock(); + } +#endif + return U64_MAX; +} + +static inline bool arch_vdso_clocksource_ok(const struct vdso_clock *vc) +{ + return true; +} +#define vdso_clocksource_ok arch_vdso_clocksource_ok + +/* + * Clocksource read value validation to handle PV and HyperV clocksources + * which can be invalidated asynchronously and indicate invalidation by + * returning U64_MAX, which can be effectively tested by checking for a + * negative value after casting it to s64. + * + * This effectively forces a S64_MAX mask on the calculations, unlike the + * U64_MAX mask normally used by x86 clocksources. + */ +static inline bool arch_vdso_cycles_ok(u64 cycles) +{ + return (s64)cycles >= 0; +} +#define vdso_cycles_ok arch_vdso_cycles_ok + +/* + * x86 specific calculation of nanoseconds for the current cycle count + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @vd->cycles_last. If not then use @vd->cycles_last, which is the base + * time of the current conversion period. + * + * This variant also uses a custom mask because while the clocksource mask of + * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses + * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above + * declares everything with the MSB/Sign-bit set as invalid. Therefore the + * effective mask is S64_MAX. + */ +static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base) +{ + u64 delta = cycles - vc->cycle_last; + + /* + * Negative motion and deltas which can cause multiplication + * overflow require special treatment. This check covers both as + * negative motion is guaranteed to be greater than @vc::max_cycles + * due to unsigned comparison. + * + * Due to the MSB/Sign-bit being used as invalid marker (see + * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that + * case is also unlikely and will also take the unlikely path here. + */ + if (unlikely(delta > vc->max_cycles)) { + /* + * Due to the above mentioned TSC wobbles, filter out + * negative motion. Per the above masking, the effective + * sign bit is now bit 62. + */ + if (delta & (1ULL << 62)) + return base >> vc->shift; + + /* Handle multiplication overflow gracefully */ + return mul_u64_u32_add_u64_shr(delta & S64_MAX, vc->mult, base, vc->shift); + } + + return ((delta * vc->mult) + base) >> vc->shift; +} +#define vdso_calc_ns vdso_calc_ns + +#endif /* !__ASSEMBLER__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h new file mode 100644 index 000000000000..7000aeb59aa2 --- /dev/null +++ b/arch/x86/include/asm/vdso/processor.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLER__ + +/* PAUSE is a good thing to insert into busy-wait loops. */ +static __always_inline void native_pause(void) +{ + asm volatile("pause" ::: "memory"); +} + +static __always_inline void cpu_relax(void) +{ + native_pause(); +} + +struct getcpu_cache; + +notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); + +#endif /* __ASSEMBLER__ */ + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..4aa311a923f2 --- /dev/null +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#define __VDSO_PAGES 6 + +#define VDSO_NR_VCLOCK_PAGES 2 +#define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VDSO_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE) +#define VDSO_PAGE_PVCLOCK_OFFSET 0 +#define VDSO_PAGE_HVCLOCK_OFFSET 1 + +#ifndef __ASSEMBLER__ + +#include <vdso/datapage.h> +#include <asm/vgtod.h> + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLER__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h new file mode 100644 index 000000000000..5d471253c755 --- /dev/null +++ b/arch/x86/include/asm/vermagic.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#ifdef CONFIG_X86_64 +/* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M486SX +#define MODULE_PROC_FAMILY "486SX " +#elif defined CONFIG_M486 +#define MODULE_PROC_FAMILY "486 " +#elif defined CONFIG_M586 +#define MODULE_PROC_FAMILY "586 " +#elif defined CONFIG_M586TSC +#define MODULE_PROC_FAMILY "586TSC " +#elif defined CONFIG_M586MMX +#define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MATOM +#define MODULE_PROC_FAMILY "ATOM " +#elif defined CONFIG_M686 +#define MODULE_PROC_FAMILY "686 " +#elif defined CONFIG_MPENTIUMII +#define MODULE_PROC_FAMILY "PENTIUMII " +#elif defined CONFIG_MPENTIUMIII +#define MODULE_PROC_FAMILY "PENTIUMIII " +#elif defined CONFIG_MPENTIUMM +#define MODULE_PROC_FAMILY "PENTIUMM " +#elif defined CONFIG_MPENTIUM4 +#define MODULE_PROC_FAMILY "PENTIUM4 " +#elif defined CONFIG_MK6 +#define MODULE_PROC_FAMILY "K6 " +#elif defined CONFIG_MK7 +#define MODULE_PROC_FAMILY "K7 " +#elif defined CONFIG_MELAN +#define MODULE_PROC_FAMILY "ELAN " +#elif defined CONFIG_MCRUSOE +#define MODULE_PROC_FAMILY "CRUSOE " +#elif defined CONFIG_MEFFICEON +#define MODULE_PROC_FAMILY "EFFICEON " +#elif defined CONFIG_MWINCHIPC6 +#define MODULE_PROC_FAMILY "WINCHIPC6 " +#elif defined CONFIG_MWINCHIP3D +#define MODULE_PROC_FAMILY "WINCHIP3D " +#elif defined CONFIG_MCYRIXIII +#define MODULE_PROC_FAMILY "CYRIXIII " +#elif defined CONFIG_MVIAC3_2 +#define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " +#elif defined CONFIG_MGEODEGX1 +#define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " +#else +#error unknown processor family +#endif + +#ifdef CONFIG_X86_32 +# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY +#else +# define MODULE_ARCH_VERMAGIC "" +#endif + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 913a133f8e6f..a0ce291abcae 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -2,92 +2,18 @@ #ifndef _ASM_X86_VGTOD_H #define _ASM_X86_VGTOD_H -#include <linux/compiler.h> -#include <linux/clocksource.h> - -#include <uapi/linux/time.h> - -#ifdef BUILD_VDSO32_64 -typedef u64 gtod_long_t; -#else -typedef unsigned long gtod_long_t; -#endif - -/* - * There is one of these objects in the vvar page for each - * vDSO-accelerated clockid. For high-resolution clocks, this encodes - * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse - * clocks, this encodes the actual time. - * - * To confuse the reader, for high-resolution clocks, nsec is left-shifted - * by vsyscall_gtod_data.shift. - */ -struct vgtod_ts { - u64 sec; - u64 nsec; -}; - -#define VGTOD_BASES (CLOCK_TAI + 1) -#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) -#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) - /* - * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time - * so be carefull by modifying this structure. + * This check is required to prevent ARCH=um to include + * unwanted headers. */ -struct vsyscall_gtod_data { - unsigned int seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - struct vgtod_ts basetime[VGTOD_BASES]; - - int tz_minuteswest; - int tz_dsttime; -}; -extern struct vsyscall_gtod_data vsyscall_gtod_data; - -extern int vclocks_used; -static inline bool vclock_was_used(int vclock) -{ - return READ_ONCE(vclocks_used) & (1 << vclock); -} - -static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) -{ - unsigned int ret; - -repeat: - ret = READ_ONCE(s->seq); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - return ret; -} - -static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned int start) -{ - smp_rmb(); - return unlikely(s->seq != start); -} +#ifdef CONFIG_GENERIC_GETTIMEOFDAY +#include <linux/compiler.h> +#include <asm/clocksource.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> -static inline void gtod_write_begin(struct vsyscall_gtod_data *s) -{ - ++s->seq; - smp_wmb(); -} +#include <uapi/linux/time.h> -static inline void gtod_write_end(struct vsyscall_gtod_data *s) -{ - smp_wmb(); - ++s->seq; -} +#endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/video.h b/arch/x86/include/asm/video.h new file mode 100644 index 000000000000..08ec328203ef --- /dev/null +++ b/arch/x86/include/asm/video.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VIDEO_H +#define _ASM_X86_VIDEO_H + +#include <linux/types.h> + +#include <asm/page.h> + +struct device; + +pgprot_t pgprot_framebuffer(pgprot_t prot, + unsigned long vm_start, unsigned long vm_end, + unsigned long offset); +#define pgprot_framebuffer pgprot_framebuffer + +#ifdef CONFIG_VIDEO +bool video_is_primary_device(struct device *dev); +#define video_is_primary_device video_is_primary_device +#endif + +#include <asm-generic/video.h> + +#endif /* _ASM_X86_VIDEO_H */ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h deleted file mode 100644 index 1fc7a0d1e877..000000000000 --- a/arch/x86/include/asm/virtext.h +++ /dev/null @@ -1,129 +0,0 @@ -/* CPU virtualization extensions handling - * - * This should carry the code for handling CPU virtualization extensions - * that needs to live in the kernel core. - * - * Author: Eduardo Habkost <ehabkost@redhat.com> - * - * Copyright (C) 2008, Red Hat Inc. - * - * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ -#ifndef _ASM_X86_VIRTEX_H -#define _ASM_X86_VIRTEX_H - -#include <asm/processor.h> - -#include <asm/vmx.h> -#include <asm/svm.h> -#include <asm/tlbflush.h> - -/* - * VMX functions: - */ - -static inline int cpu_has_vmx(void) -{ - unsigned long ecx = cpuid_ecx(1); - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ -} - - -/** Disable VMX on the current CPU - * - * vmxoff causes a undefined-opcode exception if vmxon was not run - * on the CPU previously. Only call this function if you know VMX - * is enabled. - */ -static inline void cpu_vmxoff(void) -{ - asm volatile ("vmxoff"); - cr4_clear_bits(X86_CR4_VMXE); -} - -static inline int cpu_vmx_enabled(void) -{ - return __read_cr4() & X86_CR4_VMXE; -} - -/** Disable VMX if it is enabled on the current CPU - * - * You shouldn't call this if cpu_has_vmx() returns 0. - */ -static inline void __cpu_emergency_vmxoff(void) -{ - if (cpu_vmx_enabled()) - cpu_vmxoff(); -} - -/** Disable VMX if it is supported and enabled on the current CPU - */ -static inline void cpu_emergency_vmxoff(void) -{ - if (cpu_has_vmx()) - __cpu_emergency_vmxoff(); -} - - - - -/* - * SVM functions: - */ - -/** Check if the CPU has SVM support - * - * You can use the 'msg' arg to get a message describing the problem, - * if the function returns zero. Simply pass NULL if you are not interested - * on the messages; gcc should take care of not generating code for - * the messages on this case. - */ -static inline int cpu_has_svm(const char **msg) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) { - if (msg) - *msg = "not amd or hygon"; - return 0; - } - - if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) { - if (msg) - *msg = "can't execute cpuid_8000000a"; - return 0; - } - - if (!boot_cpu_has(X86_FEATURE_SVM)) { - if (msg) - *msg = "svm not available"; - return 0; - } - return 1; -} - - -/** Disable SVM on the current CPU - * - * You should call this only if cpu_has_svm() returned true. - */ -static inline void cpu_svm_disable(void) -{ - uint64_t efer; - - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~EFER_SVME); -} - -/** Makes sure SVM is disabled, if it is supported on the CPU - */ -static inline void cpu_emergency_svm_disable(void) -{ - if (cpu_has_svm(NULL)) - cpu_svm_disable(); -} - -#endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 26efbec94448..62ee19909903 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -36,7 +36,6 @@ struct vm86 { unsigned long saved_sp0; unsigned long flags; - unsigned long screen_bitmap; unsigned long cpu_type; struct revectored_struct int_revectored; struct revectored_struct int21_revectored; @@ -85,7 +84,7 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { } -#define free_vm86(t) do { } while(0) +#define free_vm86(task) do { (void)(task); } while(0) #endif /* CONFIG_VM86 */ diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 000000000000..49ce331f3ac6 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,26 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#include <asm/cpufeature.h> +#include <asm/page.h> +#include <asm/pgtable_areas.h> + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#ifdef CONFIG_X86_64 +#define arch_vmap_pud_supported arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + return boot_cpu_has(X86_FEATURE_GBPAGES); +} +#endif + +#define arch_vmap_pmd_supported arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + return boot_cpu_has(X86_FEATURE_PSE); +} + +#endif + +#endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h new file mode 100644 index 000000000000..c9cf43d5ef23 --- /dev/null +++ b/arch/x86/include/asm/vmware.h @@ -0,0 +1,327 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +#ifndef _ASM_X86_VMWARE_H +#define _ASM_X86_VMWARE_H + +#include <asm/cpufeatures.h> +#include <asm/alternative.h> +#include <linux/stringify.h> + +/* + * VMware hypercall ABI. + * + * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall) + * have up to 6 input and 6 output arguments passed and returned using + * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3), + * %esi (arg4), %edi (arg5). + * The following input arguments must be initialized by the caller: + * arg0 - VMWARE_HYPERVISOR_MAGIC + * arg2 - Hypercall command + * arg3 bits [15:0] - Port number, LB and direction flags + * + * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB + * hypercalls. They also have up to 6 input and 6 output on registers + * arguments, with different argument to register mapping: + * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3), + * %rsi (arg4), %rdi (arg5). + * + * - High bandwidth (HB) hypercalls are I/O port based only. They have + * up to 7 input and 7 output arguments passed and returned using + * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3), + * %esi (arg4), %edi (arg5), %ebp (arg6). + * The following input arguments must be initialized by the caller: + * arg0 - VMWARE_HYPERVISOR_MAGIC + * arg1 - Hypercall command + * arg3 bits [15:0] - Port number, HB and direction flags + * + * For compatibility purposes, x86_64 systems use only lower 32 bits + * for input and output arguments. + * + * The hypercall definitions differ in the low word of the %edx (arg3) + * in the following way: the old I/O port based interface uses the port + * number to distinguish between high- and low bandwidth versions, and + * uses IN/OUT instructions to define transfer direction. + * + * The new vmcall interface instead uses a set of flags to select + * bandwidth mode and transfer direction. The flags should be loaded + * into arg3 by any user and are automatically replaced by the port + * number if the I/O port method is used. + */ + +#define VMWARE_HYPERVISOR_HB BIT(0) +#define VMWARE_HYPERVISOR_OUT BIT(1) + +#define VMWARE_HYPERVISOR_PORT 0x5658 +#define VMWARE_HYPERVISOR_PORT_HB (VMWARE_HYPERVISOR_PORT | \ + VMWARE_HYPERVISOR_HB) + +#define VMWARE_HYPERVISOR_MAGIC 0x564d5868U + +#define VMWARE_CMD_GETVERSION 10 +#define VMWARE_CMD_GETHZ 45 +#define VMWARE_CMD_GETVCPU_INFO 68 +#define VMWARE_CMD_STEALCLOCK 91 +/* + * Hypercall command mask: + * bits [6:0] command, range [0, 127] + * bits [19:16] sub-command, range [0, 15] + */ +#define VMWARE_CMD_MASK 0xf007fU + +#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0) +#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1) + +extern unsigned long vmware_hypercall_slow(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5); + +#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL +#define VMWARE_TDX_HCALL_FUNC 1 + +extern unsigned long vmware_tdx_hypercall(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5); + +/* + * The low bandwidth call. The low word of %edx is presumed to have OUT bit + * set. The high word of %edx may contain input data from the caller. + */ +#define VMWARE_HYPERCALL \ + ALTERNATIVE_2("movw %[port], %%dx\n\t" \ + "inl (%%dx), %%eax", \ + "vmcall", X86_FEATURE_VMCALL, \ + "vmmcall", X86_FEATURE_VMW_VMMCALL) + +static inline +unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + NULL, NULL, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + NULL, NULL, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1, + u32 *out1, u32 *out2) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + out1, out2, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + out1, out2, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1, + u32 *out1, u32 *out2, u32 *out3) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + out1, out2, out3, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + out1, out2, out3, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1, + unsigned long in3, unsigned long in4, + unsigned long in5, u32 *out2) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, in4, in5, + NULL, out2, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, in4, in5, + NULL, out2, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=c" (*out2) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1, + unsigned long in3, u32 *out2, + u32 *out3, u32 *out4, u32 *out5) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, 0, 0, + NULL, out2, out3, out4, out5); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, 0, 0, + NULL, out2, out3, out4, out5); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4), + "=D" (*out5) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1, + unsigned long in3, unsigned long in4, + unsigned long in5, u32 *out1, + u32 *out2, u32 *out3) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, in4, in5, + out1, out2, out3, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, in4, in5, + out1, out2, out3, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + return out0; +} + +#ifdef CONFIG_X86_64 +#define VMW_BP_CONSTRAINT "r" +#else +#define VMW_BP_CONSTRAINT "m" +#endif + +/* + * High bandwidth calls are not supported on encrypted memory guests. + * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use + * low bandwidth hypercall if memory encryption is set. + * This assumption simplifies HB hypercall implementation to just I/O port + * based approach without alternative patching. + */ +static inline +unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2, + unsigned long in3, unsigned long in4, + unsigned long in5, unsigned long in6, + u32 *out1) +{ + unsigned long out0; + + asm_inline volatile ( + UNWIND_HINT_SAVE + "push %%" _ASM_BP "\n\t" + UNWIND_HINT_UNDEFINED + "mov %[in6], %%" _ASM_BP "\n\t" + "rep outsb\n\t" + "pop %%" _ASM_BP "\n\t" + UNWIND_HINT_RESTORE + : "=a" (out0), "=b" (*out1) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (cmd), + "c" (in2), + "d" (in3 | VMWARE_HYPERVISOR_PORT_HB), + "S" (in4), + "D" (in5), + [in6] VMW_BP_CONSTRAINT (in6) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2, + unsigned long in3, unsigned long in4, + unsigned long in5, unsigned long in6, + u32 *out1) +{ + unsigned long out0; + + asm_inline volatile ( + UNWIND_HINT_SAVE + "push %%" _ASM_BP "\n\t" + UNWIND_HINT_UNDEFINED + "mov %[in6], %%" _ASM_BP "\n\t" + "rep insb\n\t" + "pop %%" _ASM_BP "\n\t" + UNWIND_HINT_RESTORE + : "=a" (out0), "=b" (*out1) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (cmd), + "c" (in2), + "d" (in3 | VMWARE_HYPERVISOR_PORT_HB), + "S" (in4), + "D" (in5), + [in6] VMW_BP_CONSTRAINT (in6) + : "cc", "memory"); + return out0; +} +#undef VMW_BP_CONSTRAINT +#undef VMWARE_HYPERCALL + +#endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 4e4133e86484..c85c50019523 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -1,93 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * vmx.h: VMX Architecture related definitions * Copyright (c) 2004, Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * * A few random additions are: * Copyright (C) 2006 Qumranet * Avi Kivity <avi@qumranet.com> * Yaniv Kamay <yaniv@qumranet.com> - * */ #ifndef VMX_H #define VMX_H #include <linux/bitops.h> +#include <linux/bug.h> #include <linux/types.h> + #include <uapi/asm/vmx.h> +#include <asm/trapnr.h> +#include <asm/vmxfeatures.h> + +#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) /* * Definitions of Primary Processor-Based VM-Execution Controls. */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVLPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 -#define CPU_BASED_CR3_STORE_EXITING 0x00010000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_USE_IO_BITMAPS 0x02000000 -#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 -#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 -#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 +#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(INTR_WINDOW_EXITING) +#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(USE_TSC_OFFSETTING) +#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING) +#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING) +#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING) +#define CPU_BASED_RDPMC_EXITING VMCS_CONTROL_BIT(RDPMC_EXITING) +#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING) +#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING) +#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING) +#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS VMCS_CONTROL_BIT(TERTIARY_CONTROLS) +#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) +#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) +#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) +#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(NMI_WINDOW_EXITING) +#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING) +#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING) +#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS) +#define CPU_BASED_MONITOR_TRAP_FLAG VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG) +#define CPU_BASED_USE_MSR_BITMAPS VMCS_CONTROL_BIT(USE_MSR_BITMAPS) +#define CPU_BASED_MONITOR_EXITING VMCS_CONTROL_BIT(MONITOR_EXITING) +#define CPU_BASED_PAUSE_EXITING VMCS_CONTROL_BIT(PAUSE_EXITING) +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS VMCS_CONTROL_BIT(SEC_CONTROLS) #define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 /* * Definitions of Secondary Processor-Based VM-Execution Controls. */ -#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 -#define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_RDTSCP 0x00000008 -#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 -#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 -#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 -#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 -#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 -#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 -#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 -#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 -#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 -#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 -#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 -#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 -#define SECONDARY_EXEC_ENABLE_PML 0x00020000 -#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000 -#define SECONDARY_EXEC_XSAVES 0x00100000 -#define SECONDARY_EXEC_PT_USE_GPA 0x01000000 -#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 -#define SECONDARY_EXEC_TSC_SCALING 0x02000000 - -#define PIN_BASED_EXT_INTR_MASK 0x00000001 -#define PIN_BASED_NMI_EXITING 0x00000008 -#define PIN_BASED_VIRTUAL_NMIS 0x00000020 -#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 -#define PIN_BASED_POSTED_INTR 0x00000080 +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES) +#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT) +#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING) +#define SECONDARY_EXEC_ENABLE_RDTSCP VMCS_CONTROL_BIT(RDTSCP) +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC) +#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID) +#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING) +#define SECONDARY_EXEC_UNRESTRICTED_GUEST VMCS_CONTROL_BIT(UNRESTRICTED_GUEST) +#define SECONDARY_EXEC_APIC_REGISTER_VIRT VMCS_CONTROL_BIT(APIC_REGISTER_VIRT) +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY) +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING) +#define SECONDARY_EXEC_RDRAND_EXITING VMCS_CONTROL_BIT(RDRAND_EXITING) +#define SECONDARY_EXEC_ENABLE_INVPCID VMCS_CONTROL_BIT(INVPCID) +#define SECONDARY_EXEC_ENABLE_VMFUNC VMCS_CONTROL_BIT(VMFUNC) +#define SECONDARY_EXEC_SHADOW_VMCS VMCS_CONTROL_BIT(SHADOW_VMCS) +#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) +#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) +#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE) +#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) +#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES) +#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) +#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) +#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) +#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION) +#define SECONDARY_EXEC_NOTIFY_VM_EXITING VMCS_CONTROL_BIT(NOTIFY_VM_EXITING) + +/* + * Definitions of Tertiary Processor-Based VM-Execution Controls. + */ +#define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT) + +#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) +#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) +#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) +#define PIN_BASED_VMX_PREEMPTION_TIMER VMCS_CONTROL_BIT(PREEMPTION_TIMER) +#define PIN_BASED_POSTED_INTR VMCS_CONTROL_BIT(POSTED_INTR) #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -103,6 +106,7 @@ #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 #define VM_EXIT_PT_CONCEAL_PIP 0x01000000 #define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 +#define VM_EXIT_LOAD_CET_STATE 0x10000000 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff @@ -116,18 +120,22 @@ #define VM_ENTRY_LOAD_BNDCFGS 0x00010000 #define VM_ENTRY_PT_CONCEAL_PIP 0x00020000 #define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 +#define VM_ENTRY_LOAD_CET_STATE 0x00100000 #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f -#define VMX_MISC_SAVE_EFER_LMA 0x00000020 -#define VMX_MISC_ACTIVITY_HLT 0x00000040 -#define VMX_MISC_ZERO_LEN_INS 0x40000000 - /* VMFUNC functions */ -#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) + +#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 +#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48) +#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) +#define VMX_BASIC_INOUT BIT_ULL(54) +#define VMX_BASIC_TRUE_CTLS BIT_ULL(55) +#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56) + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); @@ -138,9 +146,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic) return (vmx_basic & GENMASK_ULL(44, 32)) >> 32; } +static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic) +{ + return (vmx_basic & GENMASK_ULL(53, 50)) >> 50; +} + +static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) +{ + return revision | ((u64)size << 32) | ((u64)memtype << 50); +} + +#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5) +#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6) +#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7) +#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8) +#define VMX_MISC_INTEL_PT BIT_ULL(14) +#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15) +#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28) +#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29) +#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30) +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { - return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + return vmx_misc & GENMASK_ULL(4, 0); } static inline int vmx_misc_cr3_count(u64 vmx_misc) @@ -162,6 +191,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc) enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, POSTED_INTR_NV = 0x00000002, + LAST_PID_POINTER_INDEX = 0x00000008, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -219,12 +249,19 @@ enum vmcs_field { VMREAD_BITMAP_HIGH = 0x00002027, VMWRITE_BITMAP = 0x00002028, VMWRITE_BITMAP_HIGH = 0x00002029, + VE_INFORMATION_ADDRESS = 0x0000202A, + VE_INFORMATION_ADDRESS_HIGH = 0x0000202B, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, ENCLS_EXITING_BITMAP = 0x0000202E, ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, TSC_MULTIPLIER = 0x00002032, TSC_MULTIPLIER_HIGH = 0x00002033, + TERTIARY_VM_EXEC_CONTROL = 0x00002034, + TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035, + SHARED_EPT_POINTER = 0x0000203C, + PID_POINTER_TABLE = 0x00002042, + PID_POINTER_TABLE_HIGH = 0x00002043, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -273,6 +310,7 @@ enum vmcs_field { SECONDARY_VM_EXEC_CONTROL = 0x0000401e, PLE_GAP = 0x00004020, PLE_WINDOW = 0x00004022, + NOTIFY_WINDOW = 0x00004024, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, @@ -300,7 +338,7 @@ enum vmcs_field { GUEST_LDTR_AR_BYTES = 0x00004820, GUEST_TR_AR_BYTES = 0x00004822, GUEST_INTERRUPTIBILITY_INFO = 0x00004824, - GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_ACTIVITY_STATE = 0x00004826, GUEST_SYSENTER_CS = 0x0000482A, VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, HOST_IA32_SYSENTER_CS = 0x00004c00, @@ -334,6 +372,9 @@ enum vmcs_field { GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, GUEST_SYSENTER_ESP = 0x00006824, GUEST_SYSENTER_EIP = 0x00006826, + GUEST_S_CET = 0x00006828, + GUEST_SSP = 0x0000682a, + GUEST_INTR_SSP_TABLE = 0x0000682c, HOST_CR0 = 0x00006c00, HOST_CR3 = 0x00006c02, HOST_CR4 = 0x00006c04, @@ -346,6 +387,9 @@ enum vmcs_field { HOST_IA32_SYSENTER_EIP = 0x00006c12, HOST_RSP = 0x00006c14, HOST_RIP = 0x00006c16, + HOST_S_CET = 0x00006c18, + HOST_SSP = 0x00006c1a, + HOST_INTR_SSP_TABLE = 0x00006c1c }; /* @@ -363,20 +407,21 @@ enum vmcs_field { #define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK -#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ -#define INTR_TYPE_RESERVED (1 << 8) /* reserved */ -#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ -#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ -#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ -#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ -#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ -#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ +#define INTR_TYPE_EXT_INTR (EVENT_TYPE_EXTINT << 8) /* external interrupt */ +#define INTR_TYPE_RESERVED (EVENT_TYPE_RESERVED << 8) /* reserved */ +#define INTR_TYPE_NMI_INTR (EVENT_TYPE_NMI << 8) /* NMI */ +#define INTR_TYPE_HARD_EXCEPTION (EVENT_TYPE_HWEXC << 8) /* processor exception */ +#define INTR_TYPE_SOFT_INTR (EVENT_TYPE_SWINT << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION (EVENT_TYPE_PRIV_SWEXC << 8) /* ICE breakpoint */ +#define INTR_TYPE_SOFT_EXCEPTION (EVENT_TYPE_SWEXC << 8) /* software exception */ +#define INTR_TYPE_OTHER_EVENT (EVENT_TYPE_OTHER << 8) /* other event */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 #define GUEST_INTR_STATE_MOV_SS 0x00000002 #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +#define GUEST_INTR_STATE_ENCLAVE_INTR 0x00000010 /* GUEST_ACTIVITY_STATE flags */ #define GUEST_ACTIVITY_ACTIVE 0 @@ -492,20 +537,34 @@ enum vmcs_field { #define VMX_EPTP_PWL_4 0x18ull #define VMX_EPTP_PWL_5 0x20ull #define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */ #define VMX_EPTP_MT_MASK 0x7ull -#define VMX_EPTP_MT_WB 0x6ull -#define VMX_EPTP_MT_UC 0x0ull +#define VMX_EPTP_MT_WB X86_MEMTYPE_WB +#define VMX_EPTP_MT_UC X86_MEMTYPE_UC #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull #define VMX_EPT_IPAT_BIT (1ull << 6) #define VMX_EPT_ACCESS_BIT (1ull << 8) #define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63) #define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) #define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) +static inline u8 vmx_eptp_page_walk_level(u64 eptp) +{ + u64 encoded_level = eptp & VMX_EPTP_PWL_MASK; + + if (encoded_level == VMX_EPTP_PWL_5) + return 5; + + /* @eptp must be pre-validated by the caller. */ + WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4); + return 4; +} + /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ #define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) @@ -521,28 +580,38 @@ struct vmx_msr_entry { /* * Exit Qualifications for entry failure during or after loading guest state */ -#define ENTRY_FAIL_DEFAULT 0 -#define ENTRY_FAIL_PDPTE 2 -#define ENTRY_FAIL_NMI 3 -#define ENTRY_FAIL_VMCS_LINK_PTR 4 +enum vm_entry_failure_code { + ENTRY_FAIL_DEFAULT = 0, + ENTRY_FAIL_PDPTE = 2, + ENTRY_FAIL_NMI = 3, + ENTRY_FAIL_VMCS_LINK_PTR = 4, +}; /* * Exit Qualifications for EPT Violations */ -#define EPT_VIOLATION_ACC_READ_BIT 0 -#define EPT_VIOLATION_ACC_WRITE_BIT 1 -#define EPT_VIOLATION_ACC_INSTR_BIT 2 -#define EPT_VIOLATION_READABLE_BIT 3 -#define EPT_VIOLATION_WRITABLE_BIT 4 -#define EPT_VIOLATION_EXECUTABLE_BIT 5 -#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8 -#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) -#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) -#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) -#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) -#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) -#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) -#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) +#define EPT_VIOLATION_ACC_READ BIT(0) +#define EPT_VIOLATION_ACC_WRITE BIT(1) +#define EPT_VIOLATION_ACC_INSTR BIT(2) +#define EPT_VIOLATION_PROT_READ BIT(3) +#define EPT_VIOLATION_PROT_WRITE BIT(4) +#define EPT_VIOLATION_PROT_EXEC BIT(5) +#define EPT_VIOLATION_EXEC_FOR_RING3_LIN BIT(6) +#define EPT_VIOLATION_PROT_MASK (EPT_VIOLATION_PROT_READ | \ + EPT_VIOLATION_PROT_WRITE | \ + EPT_VIOLATION_PROT_EXEC) +#define EPT_VIOLATION_GVA_IS_VALID BIT(7) +#define EPT_VIOLATION_GVA_TRANSLATED BIT(8) + +#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3) + +static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) == + (EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC)); + +/* + * Exit Qualifications for NOTIFY VM EXIT + */ +#define NOTIFY_VM_CONTEXT_INVALID BIT(0) /* * VM-instruction error numbers @@ -575,6 +644,20 @@ enum vm_instruction_error_number { VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28, }; +/* + * VM-instruction errors that can be encountered on VM-Enter, used to trace + * nested VM-Enter failures reported by hardware. Errors unique to VM-Enter + * from a SMI Transfer Monitor are not included as things have gone seriously + * sideways if we get one of those... + */ +#define VMX_VMENTER_INSTRUCTION_ERRORS \ + { VMXERR_VMLAUNCH_NONCLEAR_VMCS, "VMLAUNCH_NONCLEAR_VMCS" }, \ + { VMXERR_VMRESUME_NONLAUNCHED_VMCS, "VMRESUME_NONLAUNCHED_VMCS" }, \ + { VMXERR_VMRESUME_AFTER_VMXOFF, "VMRESUME_AFTER_VMXOFF" }, \ + { VMXERR_ENTRY_INVALID_CONTROL_FIELD, "VMENTRY_INVALID_CONTROL_FIELD" }, \ + { VMXERR_ENTRY_INVALID_HOST_STATE_FIELD, "VMENTRY_INVALID_HOST_STATE_FIELD" }, \ + { VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" } + enum vmx_l1d_flush_state { VMENTER_L1D_FLUSH_AUTO, VMENTER_L1D_FLUSH_NEVER, @@ -586,4 +669,13 @@ enum vmx_l1d_flush_state { extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; +struct vmx_ve_information { + u32 exit_reason; + u32 delivery; + u64 exit_qualification; + u64 guest_linear_address; + u64 guest_physical_address; + u16 eptp_index; +}; + #endif diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h new file mode 100644 index 000000000000..09b1d7e607c1 --- /dev/null +++ b/arch/x86/include/asm/vmxfeatures.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VMXFEATURES_H +#define _ASM_X86_VMXFEATURES_H + +/* + * Defines VMX CPU feature bits + */ +#define NVMXINTS 5 /* N 32-bit words worth of info */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature bit + * is not displayed in /proc/cpuinfo at all. + */ + +/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */ +#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* VM-Exit on vectored interrupts */ +#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* VM-Exit on NMIs */ +#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */ +#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* "preemption_timer" VMX Preemption Timer */ +#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* "posted_intr" Posted Interrupts */ + +/* EPT/VPID features, scattered to bits 16-23 */ +#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* "invvpid" INVVPID is supported */ +#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */ +#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* "ept_ad" EPT Accessed/Dirty bits */ +#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* "ept_1gb" 1GB EPT pages */ +#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* "ept_5level" 5-level EPT paging */ + +/* Aggregated APIC features 24-27 */ +#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* "flexpriority" TPR shadow + virt APIC */ +#define VMX_FEATURE_APICV ( 0*32+ 25) /* "apicv" TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */ + +/* VM-Functions, shifted to bits 28-31 */ +#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* "eptp_switching" EPTP switching (in guest) */ + +/* Primary Processor-Based VM-Execution Controls, word 1 */ +#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* VM-Exit if INTRs are unblocked in guest */ +#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ +#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* VM-Exit on HLT */ +#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* VM-Exit on INVLPG */ +#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* VM-Exit on MWAIT */ +#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* VM-Exit on RDPMC */ +#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* VM-Exit on RDTSC */ +#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* VM-Exit on writes to CR3 */ +#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* VM-Exit on reads from CR3 */ +#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* Enable Tertiary VM-Execution Controls */ +#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* VM-Exit on writes to CR8 */ +#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* VM-Exit on reads from CR8 */ +#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ +#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* VM-Exit if NMIs are unblocked in guest */ +#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* VM-Exit on accesses to debug registers */ +#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* VM-Exit on *all* IN{S} and OUT{S}*/ +#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* VM-Exit based on I/O port */ +#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */ +#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* VM-Exit based on MSR index */ +#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* VM-Exit on MONITOR (MWAIT's accomplice) */ +#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* VM-Exit on PAUSE (unconditionally) */ +#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* Enable Secondary VM-Execution Controls */ + +/* Secondary Processor-Based VM-Execution Controls, word 2 */ +#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */ +#define VMX_FEATURE_EPT ( 2*32+ 1) /* "ept" Extended Page Tables, a.k.a. Two-Dimensional Paging */ +#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* VM-Exit on {S,L}*DT instructions */ +#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* Enable RDTSCP in guest */ +#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* Virtualize X2APIC for the guest */ +#define VMX_FEATURE_VPID ( 2*32+ 5) /* "vpid" Virtual Processor ID (TLB ASID modifier) */ +#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* VM-Exit on WBINVD */ +#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* "unrestricted_guest" Allow Big Real Mode and other "invalid" states */ +#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */ +#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */ +#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */ +#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* VM-Exit on RDRAND*/ +#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* Enable INVPCID in guest */ +#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* Enable VM-Functions (leaf dependent) */ +#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* "shadow_vmcs" VMREAD/VMWRITE in guest can access shadow VMCS */ +#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */ +#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* VM-Exit on RDSEED */ +#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */ +#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */ +#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ +#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* Processor Trace logs GPAs */ +#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* "tsc_scaling" Scale hardware TSC when read in guest */ +#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* "usr_wait_pause" Enable TPAUSE, UMONITOR, UMWAIT in guest */ +#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* VM-Exit on ENCLV (leaf dependent) */ +#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* VM-Exit when bus lock caused */ +#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* "notify_vm_exiting" VM-Exit when no event windows after notify window */ + +/* Tertiary Processor-Based VM-Execution Controls, word 3 */ +#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* "ipi_virt" Enable IPI virtualization */ +#endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index b986b2ca688a..472f0263dbc6 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,6 +4,7 @@ #include <linux/seqlock.h> #include <uapi/asm/vsyscall.h> +#include <asm/page_types.h> #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); @@ -13,13 +14,24 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root); * Called on instruction fetch fault in vsyscall page. * Returns true if handled. */ -extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +extern bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address); #else static inline void map_vsyscall(void) {} -static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +static inline bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { return false; } #endif +/* + * The (legacy) vsyscall page is the long page in the kernel portion + * of the address space that has user-accessible permissions. + */ +static inline bool is_vsyscall_vaddr(unsigned long vaddr) +{ + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); +} + #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h deleted file mode 100644 index 3f32dfc2ab73..000000000000 --- a/arch/x86/include/asm/vvar.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * vvar.h: Shared vDSO/kernel variable declarations - * Copyright (c) 2011 Andy Lutomirski - * Subject to the GNU General Public License, version 2 - * - * A handful of variables are accessible (read-only) from userspace - * code in the vsyscall page and the vdso. They are declared here. - * Some other file must define them with DEFINE_VVAR. - * - * In normal kernel code, they are used like any other variable. - * In user code, they are accessed through the VVAR macro. - * - * These variables live in a page of kernel data that has an extra RO - * mapping for userspace. Each variable needs a unique offset within - * that page; specify that offset with the DECLARE_VVAR macro. (If - * you mess up, the linker will catch it.) - */ - -#ifndef _ASM_X86_VVAR_H -#define _ASM_X86_VVAR_H - -#if defined(__VVAR_KERNEL_LDS) - -/* The kernel linker script defines its own magic to put vvars in the - * right place. - */ -#define DECLARE_VVAR(offset, type, name) \ - EMIT_VVAR(name, offset) - -#else - -extern char __vvar_page; - -#define DECLARE_VVAR(offset, type, name) \ - extern type vvar_ ## name __attribute__((visibility("hidden"))); - -#define VVAR(name) (vvar_ ## name) - -#define DEFINE_VVAR(type, name) \ - type name \ - __attribute__((section(".vvar_" #name), aligned(16))) __visible - -#endif - -/* DECLARE_VVAR(offset, type, name) */ - -DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) - -#undef DECLARE_VVAR - -#endif diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index 06006b0351f3..422a47746657 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -2,47 +2,15 @@ #ifndef _ASM_WORD_AT_A_TIME_H #define _ASM_WORD_AT_A_TIME_H -#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/wordpart.h> -/* - * This is largely generic for little-endian machines, but the - * optimal byte mask counting is probably going to be something - * that is architecture-specific. If you have a reliably fast - * bit count instruction, that might be better than the multiply - * and shift, for example. - */ struct word_at_a_time { const unsigned long one_bits, high_bits; }; #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } -#ifdef CONFIG_64BIT - -/* - * Jan Achrenius on G+: microoptimized version of - * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" - * that works for the bytemasks without having to - * mask them first. - */ -static inline long count_masked_bytes(unsigned long mask) -{ - return mask*0x0001020304050608ul >> 56; -} - -#else /* 32-bit case */ - -/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ -static inline long count_masked_bytes(long mask) -{ - /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ - long a = (0x0ff0001+mask) >> 23; - /* Fix the 1 for 00 case */ - return a & mask; -} - -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -56,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, return bits; } +#ifdef CONFIG_64BIT + +/* Keep the initial has_zero() value for both bitmask and size calc */ +#define create_zero_mask(bits) (bits) + +static inline unsigned long zero_bytemask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +#define find_zero(bits) (__ffs(bits) >> 3) + +#else + +/* Create the final mask for both bytemask and size */ static inline unsigned long create_zero_mask(unsigned long bits) { bits = (bits - 1) & ~bits; @@ -65,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits) /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ static inline unsigned long find_zero(unsigned long mask) { - return count_masked_bytes(mask); + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; } +#endif + /* * Load an unaligned word from kernel space. * @@ -79,27 +69,15 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, dummy; + unsigned long ret; - asm( - "1:\tmov %2,%0\n" + asm volatile( + "1: mov %[mem], %[ret]\n" "2:\n" - ".section .fixup,\"ax\"\n" - "3:\t" - "lea %2,%1\n\t" - "and %3,%1\n\t" - "mov (%1),%0\n\t" - "leal %2,%%ecx\n\t" - "andl %4,%%ecx\n\t" - "shll $3,%%ecx\n\t" - "shr %%cl,%0\n\t" - "jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - :"=&r" (ret),"=&c" (dummy) - :"m" (*(unsigned long *)addr), - "i" (-sizeof(unsigned long)), - "i" (sizeof(unsigned long)-1)); + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD) + : [ret] "=r" (ret) + : [mem] "m" (*(unsigned long *)addr)); + return ret; } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b85a7c54c6a1..6c8a6ead84f6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -2,33 +2,26 @@ #ifndef _ASM_X86_PLATFORM_H #define _ASM_X86_PLATFORM_H -#include <asm/bootparam.h> - +struct ghcb; struct mpc_bus; struct mpc_cpu; +struct pt_regs; struct mpc_table; struct cpuinfo_x86; +struct irq_domain; /** * struct x86_init_mpparse - platform specific mpparse ops - * @mpc_record: platform specific mpc record accounting * @setup_ioapic_ids: platform specific ioapic id override - * @mpc_apic_id: platform specific mpc apic id assignment - * @smp_read_mpc_oem: platform specific oem mpc table setup - * @mpc_oem_pci_bus: platform specific pci bus setup (default NULL) - * @mpc_oem_bus_info: platform specific mpc bus info - * @find_smp_config: find the smp configuration - * @get_smp_config: get the smp configuration + * @find_mptable: Find MPTABLE early to reserve the memory region + * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init() + * @parse_smp_cfg: Parse the SMP configuration data */ struct x86_init_mpparse { - void (*mpc_record)(unsigned int mode); void (*setup_ioapic_ids)(void); - int (*mpc_apic_id)(struct mpc_cpu *m); - void (*smp_read_mpc_oem)(struct mpc_table *mpc); - void (*mpc_oem_pci_bus)(struct mpc_bus *m); - void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); - void (*find_smp_config)(void); - void (*get_smp_config)(unsigned int early); + void (*find_mptable)(void); + void (*early_parse_smp_cfg)(void); + void (*parse_smp_cfg)(void); }; /** @@ -37,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** @@ -50,14 +44,16 @@ struct x86_init_resources { * @pre_vector_init: init code to run before interrupt vectors * are set up. * @intr_init: interrupt init code - * @trap_init: platform specific trap setup + * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup + * @create_pci_msi_domain: Create the PCI/MSI interrupt domain */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); - void (*trap_init)(void); + void (*intr_mode_select)(void); void (*intr_mode_init)(void); + struct irq_domain *(*create_pci_msi_domain)(void); }; /** @@ -83,7 +79,7 @@ struct x86_init_paging { /** * struct x86_init_timers - platform specific timer setup - * @setup_perpcu_clockev: set up the per cpu clock event device for the + * @setup_percpu_clockev: set up the per cpu clock event device for the * boot cpu * @timer_init: initialize the platform timer (default PIT/HPET) * @wallclock_init: init the wallclock device @@ -121,6 +117,7 @@ struct x86_init_pci { * @init_platform: platform setup * @guest_late_init: guest late init * @x2apic_available: X2APIC detection + * @msi_ext_dest_id: MSI supports 15-bit APIC IDs * @init_mem_mapping: setup early mappings during init_mem_mapping() * @init_after_bootmem: guest init after boot allocator is finished */ @@ -128,21 +125,49 @@ struct x86_hyper_init { void (*init_platform)(void); void (*guest_late_init)(void); bool (*x2apic_available)(void); + bool (*msi_ext_dest_id)(void); void (*init_mem_mapping)(void); void (*init_after_bootmem)(void); }; /** * struct x86_init_acpi - x86 ACPI init functions + * @set_root_pointer: set RSDP address * @get_root_pointer: get RSDP address * @reduced_hw_early_init: hardware reduced platform early init */ struct x86_init_acpi { + void (*set_root_pointer)(u64 addr); u64 (*get_root_pointer)(void); void (*reduced_hw_early_init)(void); }; /** + * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare: Notify HV before the encryption status of a range is changed + * @enc_status_change_finish: Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required: Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required: Returns true if a cache flush is needed before changing page encryption status + * @enc_kexec_begin: Begin the two-step process of converting shared memory back + * to private. It stops the new conversions from being started + * and waits in-flight conversions to finish, if possible. + * @enc_kexec_finish: Finish the two-step process of converting shared memory to + * private. All memory is private after the call when + * the function returns. + * It is called on only one CPU while the others are shut down + * and with interrupts disabled. + */ +struct x86_guest { + int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); + void (*enc_kexec_begin)(void); + void (*enc_kexec_finish)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -163,11 +188,14 @@ struct x86_init_ops { * struct x86_cpuinit_ops - platform specific cpu hotplug setups * @setup_percpu_clockev: set up the per cpu clock event device * @early_percpu_clock_init: early init of the per cpu clock event device + * @fixup_cpu_id: fixup function for cpuinfo_x86::topo.pkg_id + * @parallel_bringup: Parallel bringup control */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); void (*early_percpu_clock_init)(void); void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); + bool parallel_bringup; }; struct timespec64; @@ -201,7 +229,7 @@ struct x86_legacy_devices { * given platform/subarch. * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller * is absent. - * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be + * @X86_LEGACY_I8042_EXPECTED_PRESENT: the controller is likely to be * present, the i8042 driver should probe for controller existence. */ enum x86_legacy_i8042_state { @@ -216,6 +244,8 @@ enum x86_legacy_i8042_state { * @i8042: indicated if we expect the device to have i8042 controller * present. * @rtc: this device has a CMOS real-time clock present + * @warm_reset: 1 if platform allows warm reset, else 0 + * @no_vga: 1 if (FADT.boot_flags & ACPI_FADT_NO_VGA) is set, else 0 * @reserve_bios_regions: boot code will search for the EBDA address and the * start of the 640k - 1M BIOS region. If false, the platform must * ensure that its memory map correctly reserves sub-1MB regions as needed. @@ -234,10 +264,26 @@ struct x86_legacy_features { /** * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks * - * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely) + * @pin_vcpu: pin current vcpu to specified physical + * cpu (run rarely) + * @sev_es_hcall_prepare: Load additional hypervisor-specific + * state into the GHCB when doing a VMMCALL under + * SEV-ES. Called from the #VC exception handler. + * @sev_es_hcall_finish: Copies state from the GHCB back into the + * processor (or pt_regs). Also runs checks on the + * state returned from the hypervisor after a + * VMMCALL under SEV-ES. Needs to return 'false' + * if the checks fail. Called from the #VC + * exception handler. + * @is_private_mmio: For CoCo VMs, must map MMIO address as private. + * Used when device is emulated by a paravisor + * layer in the VM context. */ struct x86_hyper_runtime { void (*pin_vcpu)(int cpu); + void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs); + bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs); + bool (*is_private_mmio)(u64 addr); }; /** @@ -246,8 +292,10 @@ struct x86_hyper_runtime { * @calibrate_tsc: calibrate TSC, if different from CPU * @get_wallclock: get time from HW clock like RTC etc. * @set_wallclock: set time back to HW clock - * @is_untracked_pat_range exclude from PAT logic - * @nmi_init enable NMI on cpus + * @iommu_shutdown: set by an IOMMU driver for shutdown if necessary + * @is_untracked_pat_range: exclude from PAT logic + * @nmi_init: enable NMI on cpus + * @get_nmi_reason: get the reason an NMI was received * @save_sched_clock_state: save state for sched_clock() on suspend * @restore_sched_clock_state: restore state for sched_clock() on resume * @apic_post_init: adjust apic if needed @@ -259,7 +307,10 @@ struct x86_hyper_runtime { * possible in x86_early_init_platform_quirks() by * only using the current x86_hardware_subarch * semantics. + * @realmode_reserve: reserve memory for realmode trampoline + * @realmode_init: initialize realmode trampoline * @hyper: x86 hypervisor specific runtime callbacks + * @guest: guest incarnations callbacks */ struct x86_platform_ops { unsigned long (*calibrate_cpu)(void); @@ -275,16 +326,10 @@ struct x86_platform_ops { void (*apic_post_init)(void); struct x86_legacy_features legacy; void (*set_legacy_features)(void); + void (*realmode_reserve)(void); + void (*realmode_init)(void); struct x86_hyper_runtime hyper; -}; - -struct pci_dev; - -struct x86_msi_ops { - int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); - void (*teardown_msi_irq)(unsigned int irq); - void (*teardown_msi_irqs)(struct pci_dev *dev); - void (*restore_msi_irqs)(struct pci_dev *dev); + struct x86_guest guest; }; struct x86_apic_ops { @@ -301,6 +346,10 @@ extern struct x86_apic_ops x86_apic_ops; extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); +extern bool bool_x86_init_noop(void); +extern void x86_op_int_noop(int cpu); extern bool x86_pnpbios_disabled(void); +extern int set_rtc_noop(const struct timespec64 *now); +extern void get_rtc_noop(struct timespec64 *now); #endif diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index a9630104f1c4..a3c29b1496c8 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -89,17 +89,40 @@ * Sub-leaf 2: EAX: host tsc frequency in kHz */ +#define XEN_CPUID_TSC_EMULATED (1u << 0) +#define XEN_CPUID_HOST_TSC_RELIABLE (1u << 1) +#define XEN_CPUID_RDTSCP_INSTR_AVAIL (1u << 2) + +#define XEN_CPUID_TSC_MODE_DEFAULT (0) +#define XEN_CPUID_TSC_MODE_ALWAYS_EMULATE (1u) +#define XEN_CPUID_TSC_MODE_NEVER_EMULATE (2u) +#define XEN_CPUID_TSC_MODE_PVRDTSCP (3u) + /* * Leaf 5 (0x40000x04) * HVM-specific features * Sub-leaf 0: EAX: Features * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) + * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag) */ #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */ #define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */ /* Memory mapped from other domains has valid IOMMU entries */ #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) #define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ +#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */ +/* + * With interrupt format set to 0 (non-remappable) bits 55:49 from the + * IO-APIC RTE and bits 11:5 from the MSI address can be used to store + * high bits for the Destination ID. This expands the Destination ID + * field from 8 to 15 bits, allowing to target APIC IDs up 32768. + */ +#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5) +/* + * Per-vCPU event channel upcalls work correctly with physical IRQs + * bound to event channels. + */ +#define XEN_HVM_CPUID_UPCALL_VECTOR (1u << 6) /* * Leaf 6 (0x40000x05) diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 068d9b067c83..62bdceb594f1 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -23,7 +23,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) /* No need for a barrier -- XCHG is a barrier on x86. */ #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) -extern int xen_have_vector_callback; +extern bool xen_have_vector_callback; /* * Events delivered via platform PCI interrupts are always @@ -34,4 +34,5 @@ static inline bool xen_support_evtchn_rebind(void) return (!xen_hvm_domain() || xen_have_vector_callback); } +extern bool xen_percpu_upcall; #endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index ef05bea7010d..a16d4631547c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -38,11 +38,13 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/pgtable.h> +#include <linux/instrumentation.h> #include <trace/events/xen.h> +#include <asm/alternative.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/smap.h> #include <asm/nospec-branch.h> @@ -82,15 +84,25 @@ struct xen_dm_op_buf; * - clobber the rest * * The result certainly isn't pretty, and it really shows up cpp's - * weakness as as macro language. Sorry. (But let's just give thanks + * weakness as a macro language. Sorry. (But let's just give thanks * there aren't more than 5 arguments...) */ -extern struct { char _entry[32]; } hypercall_page[]; +void xen_hypercall_func(void); +DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func); -#define __HYPERCALL "call hypercall_page+%c[offset]" -#define __HYPERCALL_ENTRY(x) \ - [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) +#ifdef MODULE +#define __ADDRESSABLE_xen_hypercall +#else +#define __ADDRESSABLE_xen_hypercall \ + __stringify(.global STATIC_CALL_KEY(xen_hypercall);) +#endif + +#define __HYPERCALL \ + __ADDRESSABLE_xen_hypercall \ + __stringify(call STATIC_CALL_TRAMP(xen_hypercall)) + +#define __HYPERCALL_ENTRY(x) "a" (x) #ifdef CONFIG_X86_32 #define __HYPERCALL_RETREG "eax" @@ -148,7 +160,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_0ARG(); \ asm volatile (__HYPERCALL \ : __HYPERCALL_0PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER0); \ (type)__res; \ }) @@ -159,7 +171,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_1ARG(a1); \ asm volatile (__HYPERCALL \ : __HYPERCALL_1PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER1); \ (type)__res; \ }) @@ -170,7 +182,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_2ARG(a1, a2); \ asm volatile (__HYPERCALL \ : __HYPERCALL_2PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER2); \ (type)__res; \ }) @@ -181,7 +193,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_3ARG(a1, a2, a3); \ asm volatile (__HYPERCALL \ : __HYPERCALL_3PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER3); \ (type)__res; \ }) @@ -192,7 +204,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_4ARG(a1, a2, a3, a4); \ asm volatile (__HYPERCALL \ : __HYPERCALL_4PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER4); \ (type)__res; \ }) @@ -206,14 +218,28 @@ xen_single_call(unsigned int call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); - asm volatile(CALL_NOSPEC + asm volatile(__HYPERCALL : __HYPERCALL_5PARAM - : [thunk_target] "a" (&hypercall_page[call]) + : __HYPERCALL_ENTRY(call) : __HYPERCALL_CLOBBER5); return (long)__res; } +static __always_inline void __xen_stac(void) +{ + /* + * Suppress objtool seeing the STAC/CLAC and getting confused about it + * calling random code with AC=1. + */ + asm volatile(ASM_STAC_UNSAFE ::: "memory", "flags"); +} + +static __always_inline void __xen_clac(void) +{ + asm volatile(ASM_CLAC_UNSAFE ::: "memory", "flags"); +} + static inline long privcmd_call(unsigned int call, unsigned long a1, unsigned long a2, @@ -222,13 +248,14 @@ privcmd_call(unsigned int call, { long res; - stac(); + __xen_stac(); res = xen_single_call(call, a1, a2, a3, a4, a5); - clac(); + __xen_clac(); return res; } +#ifdef CONFIG_XEN_PV static inline int HYPERVISOR_set_trap_table(struct trap_info *table) { @@ -261,7 +288,108 @@ HYPERVISOR_callback_op(int cmd, void *arg) return _hypercall2(int, callback_op, cmd, arg); } +static __always_inline int +HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static __always_inline unsigned long +HYPERVISOR_get_debugreg(int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + static inline int +HYPERVISOR_update_descriptor(u64 ma, u64 desc) +{ + return _hypercall2(int, update_descriptor, ma, desc); +} + +static inline int +HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, + unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); +} + +static inline int +HYPERVISOR_set_segment_base(int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; + + trace_xen_mc_entry(mcl, 1); +} + +static inline void +MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = va; + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + + trace_xen_mc_entry(mcl, 3); +} + +static inline void +MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, + struct desc_struct desc) +{ + mcl->op = __HYPERVISOR_update_descriptor; + mcl->args[0] = maddr; + mcl->args[1] = *(unsigned long *)&desc; + + trace_xen_mc_entry(mcl, 2); +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)req; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmuext_op; + mcl->args[0] = (unsigned long)op; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; + + trace_xen_mc_entry(mcl, 4); +} + +static inline void +MULTI_stack_switch(struct multicall_entry *mcl, + unsigned long ss, unsigned long esp) +{ + mcl->op = __HYPERVISOR_stack_switch; + mcl->args[0] = ss; + mcl->args[1] = esp; + + trace_xen_mc_entry(mcl, 2); +} +#endif + +static __always_inline int HYPERVISOR_sched_op(int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); @@ -289,26 +417,6 @@ HYPERVISOR_platform_op(struct xen_platform_op *op) return _hypercall1(int, platform_op, op); } -static inline int -HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long -HYPERVISOR_get_debugreg(int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); -} - -static inline int -HYPERVISOR_update_descriptor(u64 ma, u64 desc) -{ - if (sizeof(u64) == sizeof(long)) - return _hypercall2(int, update_descriptor, ma, desc); - return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); -} - static inline long HYPERVISOR_memory_op(unsigned int cmd, void *arg) { @@ -322,28 +430,12 @@ HYPERVISOR_multicall(void *call_list, uint32_t nr_calls) } static inline int -HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, - unsigned long flags) -{ - if (sizeof(new_val) == sizeof(long)) - return _hypercall3(int, update_va_mapping, va, - new_val.pte, flags); - else - return _hypercall4(int, update_va_mapping, va, - new_val.pte, new_val.pte >> 32, flags); -} -extern int __must_check xen_event_channel_op_compat(int, void *); - -static inline int HYPERVISOR_event_channel_op(int cmd, void *arg) { - int rc = _hypercall2(int, event_channel_op, cmd, arg); - if (unlikely(rc == -ENOSYS)) - rc = xen_event_channel_op_compat(cmd, arg); - return rc; + return _hypercall2(int, event_channel_op, cmd, arg); } -static inline int +static __always_inline int HYPERVISOR_xen_version(int cmd, void *arg) { return _hypercall2(int, xen_version, cmd, arg); @@ -355,15 +447,10 @@ HYPERVISOR_console_io(int cmd, int count, char *str) return _hypercall3(int, console_io, cmd, count, str); } -extern int __must_check xen_physdev_op_compat(int, void *); - static inline int HYPERVISOR_physdev_op(int cmd, void *arg) { - int rc = _hypercall2(int, physdev_op, cmd, arg); - if (unlikely(rc == -ENOSYS)) - rc = xen_physdev_op_compat(cmd, arg); - return rc; + return _hypercall2(int, physdev_op, cmd, arg); } static inline int @@ -384,14 +471,6 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } -#ifdef CONFIG_X86_64 -static inline int -HYPERVISOR_set_segment_base(int reg, unsigned long value) -{ - return _hypercall2(int, set_segment_base, reg, value); -} -#endif - static inline int HYPERVISOR_suspend(unsigned long start_info_mfn) { @@ -413,13 +492,6 @@ HYPERVISOR_hvm_op(int op, void *arg) } static inline int -HYPERVISOR_tmem_op( - struct tmem_op *op) -{ - return _hypercall1(int, tmem_op, op); -} - -static inline int HYPERVISOR_xenpmu_op(unsigned int op, void *arg) { return _hypercall2(int, xenpmu_op, op, arg); @@ -430,94 +502,10 @@ HYPERVISOR_dm_op( domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs) { int ret; - stac(); + __xen_stac(); ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs); - clac(); + __xen_clac(); return ret; } -static inline void -MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) -{ - mcl->op = __HYPERVISOR_fpu_taskswitch; - mcl->args[0] = set; - - trace_xen_mc_entry(mcl, 1); -} - -static inline void -MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, - pte_t new_val, unsigned long flags) -{ - mcl->op = __HYPERVISOR_update_va_mapping; - mcl->args[0] = va; - if (sizeof(new_val) == sizeof(long)) { - mcl->args[1] = new_val.pte; - mcl->args[2] = flags; - } else { - mcl->args[1] = new_val.pte; - mcl->args[2] = new_val.pte >> 32; - mcl->args[3] = flags; - } - - trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4); -} - -static inline void -MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, - struct desc_struct desc) -{ - mcl->op = __HYPERVISOR_update_descriptor; - if (sizeof(maddr) == sizeof(long)) { - mcl->args[0] = maddr; - mcl->args[1] = *(unsigned long *)&desc; - } else { - u32 *p = (u32 *)&desc; - - mcl->args[0] = maddr; - mcl->args[1] = maddr >> 32; - mcl->args[2] = *p++; - mcl->args[3] = *p; - } - - trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4); -} - -static inline void -MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, - int count, int *success_count, domid_t domid) -{ - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)req; - mcl->args[1] = count; - mcl->args[2] = (unsigned long)success_count; - mcl->args[3] = domid; - - trace_xen_mc_entry(mcl, 4); -} - -static inline void -MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, - int *success_count, domid_t domid) -{ - mcl->op = __HYPERVISOR_mmuext_op; - mcl->args[0] = (unsigned long)op; - mcl->args[1] = count; - mcl->args[2] = (unsigned long)success_count; - mcl->args[3] = domid; - - trace_xen_mc_entry(mcl, 4); -} - -static inline void -MULTI_stack_switch(struct multicall_entry *mcl, - unsigned long ss, unsigned long esp) -{ - mcl->op = __HYPERVISOR_stack_switch; - mcl->args[0] = ss; - mcl->args[1] = esp; - - trace_xen_mc_entry(mcl, 2); -} - #endif /* _ASM_X86_XEN_HYPERCALL_H */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 39171b3646bb..c2fc7869b996 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -36,25 +36,22 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +#include <asm/bug.h> #include <asm/processor.h> +#define XEN_SIGNATURE "XenVMMXenVMM" + static inline uint32_t xen_cpuid_base(void) { - return hypervisor_cpuid_base("XenVMMXenVMM", 2); + return cpuid_base_hypervisor(XEN_SIGNATURE, 2); } -#ifdef CONFIG_XEN -extern bool xen_hvm_need_lapic(void); +struct pci_dev; -static inline bool xen_x2apic_para_available(void) -{ - return xen_hvm_need_lapic(); -} +#ifdef CONFIG_XEN_PV_DOM0 +bool xen_initdom_restore_msi(struct pci_dev *dev); #else -static inline bool xen_x2apic_para_available(void) -{ - return (xen_cpuid_base() != 0); -} +static inline bool xen_initdom_restore_msi(struct pci_dev *dev) { return true; } #endif #ifdef CONFIG_HOTPLUG_CPU @@ -62,6 +59,43 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif -extern void xen_set_iopl_mask(unsigned mask); +#ifdef CONFIG_PVH +void __init xen_pvh_init(struct boot_params *boot_params); +void __init mem_map_via_hcall(struct boot_params *boot_params_p); +#endif + +/* Lazy mode for batching updates / context switch */ +enum xen_lazy_mode { + XEN_LAZY_NONE, + XEN_LAZY_MMU, + XEN_LAZY_CPU, +}; + +DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); + +static inline void enter_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); + + this_cpu_write(xen_lazy_mode, mode); +} + +static inline void leave_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != mode); + + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); +} + +enum xen_lazy_mode xen_get_lazy_mode(void); + +#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) +void xen_sanitize_proc_cap_bits(uint32_t *buf); +#else +static inline void xen_sanitize_proc_cap_bits(uint32_t *buf) +{ + BUG(); +} +#endif #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 62ca03ef5c65..a078a2b0f032 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -72,7 +72,7 @@ #endif #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Explicitly size integers that represent pfns in the public interface * with Xen so that on ARM we can have one ABI that works for 32 and 64 * bit guests. */ @@ -137,7 +137,7 @@ DEFINE_GUEST_HANDLE(xen_ulong_t); #define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ @@ -182,8 +182,11 @@ struct arch_shared_info { unsigned long p2m_cr3; /* cr3 value of the p2m address space */ unsigned long p2m_vaddr; /* virtual address of the p2m list */ unsigned long p2m_generation; /* generation count of p2m mapping */ +#ifdef CONFIG_X86_32 + uint32_t wc_sec_hi; +#endif }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_X86_32 #include <asm/xen/interface_32.h> @@ -193,7 +196,7 @@ struct arch_shared_info { #include <asm/pvclock-abi.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. @@ -373,18 +376,15 @@ struct xen_pmu_arch { } c; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" -#endif +#include <asm/emulate_prefix.h> + +#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;) +#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid) #endif /* _ASM_X86_XEN_INTERFACE_H */ diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index dc40578abded..74d9768a9cf7 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -44,7 +44,7 @@ */ #define __HYPERVISOR_VIRT_START 0xF5800000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct cpu_user_regs { uint32_t ebx; @@ -85,7 +85,7 @@ typedef struct xen_callback xen_callback_t; #define XEN_CALLBACK(__cs, __eip) \ ((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) }) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index c599ec269a25..38a19edb81a3 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -61,7 +61,7 @@ * RING1 -> RING3 kernel mode. * RING2 -> RING3 kernel mode. * RING3 -> RING3 user mode. - * However RING0 indicates that the guest kernel should return to iteself + * However RING0 indicates that the guest kernel should return to itself * directly with * orb $3,1*8(%rsp) * iretq @@ -77,7 +77,7 @@ #define VGCF_in_syscall (1<<_VGCF_in_syscall) #define VGCF_IN_SYSCALL VGCF_in_syscall -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct iret_context { /* Top of stack (%rsp at point of hypercall). */ @@ -143,7 +143,7 @@ typedef unsigned long xen_callback_t; #define XEN_CALLBACK(__cs, __rip) \ ((unsigned long)(__rip)) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_XEN_INTERFACE_64_H */ diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h deleted file mode 100644 index 116777e7f387..000000000000 --- a/arch/x86/include/asm/xen/page-coherent.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_XEN_PAGE_COHERENT_H -#define _ASM_X86_XEN_PAGE_COHERENT_H - -#include <asm/page.h> -#include <linux/dma-mapping.h> - -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs) -{ - void *vstart = (void*)__get_free_pages(flags, get_order(size)); - *dma_handle = virt_to_phys(vstart); - return vstart; -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, - unsigned long attrs) -{ - free_pages((unsigned long) cpu_addr, get_order(size)); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) { } - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) { } - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - -#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 790ce08e41f2..59f642a94b9d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -11,11 +11,10 @@ #include <asm/extable.h> #include <asm/page.h> -#include <asm/pgtable.h> +#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/interface/grant_table.h> -#include <xen/features.h> /* Xen machine address */ typedef struct xmaddr { @@ -97,11 +96,7 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) asm volatile("1: mov %[val], %[ptr]\n" "2:\n" - ".section .fixup, \"ax\"\n" - "3: sub $1, %[ret]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret]) : [ret] "+r" (ret), [ptr] "=m" (*addr) : [val] "r" (val)); @@ -111,16 +106,12 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) static inline int xen_safe_read_ulong(const unsigned long *addr, unsigned long *val) { - int ret = 0; unsigned long rval = ~0ul; + int ret = 0; asm volatile("1: mov %[ptr], %[rval]\n" "2:\n" - ".section .fixup, \"ax\"\n" - "3: sub $1, %[ret]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret]) : [ret] "+r" (ret), [rval] "+r" (rval) : [ptr] "m" (*addr)); *val = rval; @@ -171,7 +162,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) * pfn_to_mfn. This will have to be removed when we figured * out which call. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return pfn; mfn = __pfn_to_mfn(pfn); @@ -184,7 +175,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline int phys_to_machine_mapping_valid(unsigned long pfn) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return 1; return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY; @@ -219,7 +210,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) * gfn_to_pfn. This will have to be removed when we figure * out which call. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return mfn; pfn = mfn_to_pfn_no_overrides(mfn); @@ -251,7 +242,7 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) /* Pseudo-physical <-> Guest conversion */ static inline unsigned long pfn_to_gfn(unsigned long pfn) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return pfn; else return pfn_to_mfn(pfn); @@ -259,7 +250,7 @@ static inline unsigned long pfn_to_gfn(unsigned long pfn) static inline unsigned long gfn_to_pfn(unsigned long gfn) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return gfn; else return mfn_to_pfn(gfn); @@ -293,7 +284,7 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn) { unsigned long pfn; - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return mfn; pfn = mfn_to_pfn(mfn); @@ -304,7 +295,10 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +static inline unsigned long virt_to_pfn(const void *v) +{ + return PFN_DOWN(__pa(v)); +} #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) @@ -356,9 +350,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); -#define xen_remap(cookie, size) ioremap((cookie), (size)); -#define xen_unmap(cookie) iounmap((cookie)) - static inline bool xen_arch_need_swiotlb(struct device *dev, phys_addr_t phys, dma_addr_t dev_addr) @@ -366,9 +357,4 @@ static inline bool xen_arch_need_swiotlb(struct device *dev, return false; } -static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order) -{ - return __get_free_pages(__GFP_NOWARN, order); -} - #endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 3506d8c598c1..9015b888edd6 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -14,29 +14,13 @@ static inline int pci_xen_hvm_init(void) return -1; } #endif -#if defined(CONFIG_XEN_DOM0) +#ifdef CONFIG_XEN_PV_DOM0 int __init pci_xen_initial_domain(void); -int xen_find_device_domain_owner(struct pci_dev *dev); -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -int xen_unregister_device_domain_owner(struct pci_dev *dev); #else static inline int __init pci_xen_initial_domain(void) { return -1; } -static inline int xen_find_device_domain_owner(struct pci_dev *dev) -{ - return -1; -} -static inline int xen_register_device_domain_owner(struct pci_dev *dev, - uint16_t domain) -{ - return -1; -} -static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) -{ - return -1; -} #endif #if defined(CONFIG_PCI_MSI) diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 6b56d0d45d15..abde0f44df57 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -2,16 +2,10 @@ #ifndef _ASM_X86_SWIOTLB_XEN_H #define _ASM_X86_SWIOTLB_XEN_H -#ifdef CONFIG_SWIOTLB_XEN -extern int xen_swiotlb; -extern int __init pci_xen_swiotlb_detect(void); -extern void __init pci_xen_swiotlb_init(void); -extern int pci_xen_swiotlb_init_late(void); -#else -#define xen_swiotlb (0) -static inline int __init pci_xen_swiotlb_detect(void) { return 0; } -static inline void __init pci_xen_swiotlb_init(void) { } -static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } -#endif +int xen_swiotlb_fixup(void *buf, unsigned long nslabs); +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, + unsigned int address_bits, + dma_addr_t *dma_handle); +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); #endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 45c8605467f1..7b0307acc410 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_XOR_H #define _ASM_X86_XOR_H /* * Optimized RAID-5 checksumming functions for SSE. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -65,7 +57,8 @@ op(i + 3, 3) static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 8; @@ -116,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 8; @@ -150,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 8; @@ -209,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 8; @@ -246,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 8; @@ -312,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 8; @@ -351,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 8; @@ -424,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 8; diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 635eac543922..7a6b9474591e 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_XOR_32_H #define _ASM_X86_XOR_32_H /* * Optimized RAID-5 checksumming functions for MMX. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -29,7 +21,8 @@ #include <asm/fpu/api.h> static void -xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 7; @@ -72,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 7; @@ -121,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 7; @@ -176,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, static void -xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 7; @@ -256,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, #undef BLOCK static void -xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 6; @@ -303,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 6; @@ -360,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 6; @@ -426,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 6; diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 22a7b1870a31..7f81dd5897f4 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _ASM_X86_XOR_AVX_H #define _ASM_X86_XOR_AVX_H @@ -8,15 +9,8 @@ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> * * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. */ -#ifdef CONFIG_AS_AVX - #include <linux/compiler.h> #include <asm/fpu/api.h> @@ -32,7 +26,8 @@ BLOCK4(8) \ BLOCK4(12) -static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) +static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1) { unsigned long lines = bytes >> 9; @@ -58,8 +53,9 @@ do { \ kernel_fpu_end(); } -static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2) +static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 9; @@ -88,8 +84,10 @@ do { \ kernel_fpu_end(); } -static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2, unsigned long *p3) +static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 9; @@ -121,8 +119,11 @@ do { \ kernel_fpu_end(); } -static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2, unsigned long *p3, unsigned long *p4) +static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 9; @@ -174,11 +175,4 @@ do { \ #define AVX_SELECT(FASTEST) \ (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) -#else - -#define AVX_XOR_SPEED {} - -#define AVX_SELECT(FASTEST) (FASTEST) - -#endif #endif diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index f6648e9928b3..39606a856d3b 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ -include include/uapi/asm-generic/Kbuild.asm - +# SPDX-License-Identifier: GPL-2.0 generated-y += unistd_32.h generated-y += unistd_64.h generated-y += unistd_x32.h diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h new file mode 100644 index 000000000000..92d8f256d096 --- /dev/null +++ b/arch/x86/include/uapi/asm/amd_hsmp.h @@ -0,0 +1,477 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_X86_AMD_HSMP_H_ +#define _UAPI_ASM_X86_AMD_HSMP_H_ + +#include <linux/types.h> + +#pragma pack(4) + +#define HSMP_MAX_MSG_LEN 8 + +/* + * HSMP Messages supported + */ +enum hsmp_message_ids { + HSMP_TEST = 1, /* 01h Increments input value by 1 */ + HSMP_GET_SMU_VER, /* 02h SMU FW version */ + HSMP_GET_PROTO_VER, /* 03h HSMP interface version */ + HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */ + HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */ + HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */ + HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */ + HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */ + HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */ + HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */ + HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */ + HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */ + HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */ + HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */ + HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */ + HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */ + HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */ + HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */ + HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */ + HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */ + HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */ + HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */ + HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */ + HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */ + HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */ + HSMP_GET_SOCKET_FMAX_FMIN, /* 1Ch Get Fmax and Fmin per socket */ + HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */ + HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */ + HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */ + HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */ + HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */ + HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */ + HSMP_GET_METRIC_TABLE_VER, /* 23h Get metrics table version */ + HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */ + HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */ + HSMP_SET_XGMI_PSTATE_RANGE, /* 26h Set xGMI P-state range */ + HSMP_CPU_RAIL_ISO_FREQ_POLICY, /* 27h Get/Set Cpu Iso frequency policy */ + HSMP_DFC_ENABLE_CTRL, /* 28h Enable/Disable DF C-state */ + HSMP_GET_RAPL_UNITS = 0x30, /* 30h Get scaling factor for energy */ + HSMP_GET_RAPL_CORE_COUNTER, /* 31h Get core energy counter value */ + HSMP_GET_RAPL_PACKAGE_COUNTER, /* 32h Get package energy counter value */ + HSMP_MSG_ID_MAX, +}; + +struct hsmp_message { + __u32 msg_id; /* Message ID */ + __u16 num_args; /* Number of input argument words in message */ + __u16 response_sz; /* Number of expected output/response words */ + __u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */ + __u16 sock_ind; /* socket number */ +}; + +enum hsmp_msg_type { + HSMP_RSVD = -1, + HSMP_SET = 0, + HSMP_GET = 1, + HSMP_SET_GET = 2, +}; + +enum hsmp_proto_versions { + HSMP_PROTO_VER2 = 2, + HSMP_PROTO_VER3, + HSMP_PROTO_VER4, + HSMP_PROTO_VER5, + HSMP_PROTO_VER6, + HSMP_PROTO_VER7 +}; + +struct hsmp_msg_desc { + int num_args; + int response_sz; + enum hsmp_msg_type type; +}; + +/* + * User may use these comments as reference, please find the + * supported list of messages and message definition in the + * HSMP chapter of respective family/model PPR. + * + * Not supported messages would return -ENOMSG. + */ +static const struct hsmp_msg_desc hsmp_msg_desc_table[] + __attribute__((unused)) = { + /* RESERVED */ + {0, 0, HSMP_RSVD}, + + /* + * HSMP_TEST, num_args = 1, response_sz = 1 + * input: args[0] = xx + * output: args[0] = xx + 1 + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SMU_VER, num_args = 0, response_sz = 1 + * output: args[0] = smu fw ver + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1 + * output: args[0] = proto version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1 + * output: args[0] = socket power in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = power limit value in mWatts + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = socket power limit value in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1 + * output: args[0] = maximuam socket power limit in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = apic id[31:16] + boost limit value in MHz[15:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0 + * input: args[0] = boost limit value in MHz + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id + * output: args[0] = boost limit value in MHz + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1 + * output: args[0] = proc hot status + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0 + * input: args[0] = df pstate[7:0] + */ + {1, 0, HSMP_SET}, + + /* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */ + {0, 0, HSMP_SET}, + + /* + * HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2 + * output: args[0] = fclk in MHz, args[1] = mclk in MHz + */ + {0, 2, HSMP_GET}, + + /* + * HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = core clock in MHz + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1 + * output: args[0] = average c0 residency + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0 + * input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1 + * input: args[0] = nbioid[23:16] + * output: args[0] = max dpm level[15:8] + min dpm level[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1 + * output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] + + * bw in percentage[7:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1 + * output: args[0] = temperature in degree celsius. [15:8] integer part + + * [7:5] fractional part + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = refresh rate[3] + temperature range[2:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = frequency in MHz[31:16] + frequency source[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id [31:0] + * output: args[0] = frequency in MHz[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1 + * output: args[0] = power in mW[31:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1 + * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = io bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = xgmi bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1 + * input: args[0] = link rate control value + * output: args[0] = previous link rate control value + */ + {1, 1, HSMP_SET}, + + /* + * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0 + * input: args[0] = power efficiency mode[2:0] + */ + {1, 1, HSMP_SET_GET}, + + /* + * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0 + * input: args[0] = min df pstate[15:8] + max df pstate[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1 + * output: args[0] = metrics table version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0 + */ + {0, 0, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2 + * output: args[0] = lower 32 bits of the address + * output: args[1] = upper 32 bits of the address + */ + {0, 2, HSMP_GET}, + + /* + * HSMP_SET_XGMI_PSTATE_RANGE, num_args = 1, response_sz = 0 + * input: args[0] = min xGMI p-state[15:8] + max xGMI p-state[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_CPU_RAIL_ISO_FREQ_POLICY, num_args = 1, response_sz = 1 + * input: args[0] = set/get policy[31] + + * disable/enable independent control[0] + * output: args[0] = current policy[0] + */ + {1, 1, HSMP_SET_GET}, + + /* + * HSMP_DFC_ENABLE_CTRL, num_args = 1, response_sz = 1 + * input: args[0] = set/get policy[31] + enable/disable DFC[0] + * output: args[0] = current policy[0] + */ + {1, 1, HSMP_SET_GET}, + + /* RESERVED(0x29-0x2f) */ + {0, 0, HSMP_RSVD}, + {0, 0, HSMP_RSVD}, + {0, 0, HSMP_RSVD}, + {0, 0, HSMP_RSVD}, + {0, 0, HSMP_RSVD}, + {0, 0, HSMP_RSVD}, + {0, 0, HSMP_RSVD}, + + /* + * HSMP_GET_RAPL_UNITS, response_sz = 1 + * output: args[0] = tu value[19:16] + esu value[12:8] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_RAPL_CORE_COUNTER, num_args = 1, response_sz = 1 + * input: args[0] = apic id[15:0] + * output: args[0] = lower 32 bits of energy + * output: args[1] = upper 32 bits of energy + */ + {1, 2, HSMP_GET}, + + /* + * HSMP_GET_RAPL_PACKAGE_COUNTER, num_args = 0, response_sz = 1 + * output: args[0] = lower 32 bits of energy + * output: args[1] = upper 32 bits of energy + */ + {0, 2, HSMP_GET}, + +}; + +/* Metrics table (supported only with proto version 6) */ +struct hsmp_metric_table { + __u32 accumulation_counter; + + /* TEMPERATURE */ + __u32 max_socket_temperature; + __u32 max_vr_temperature; + __u32 max_hbm_temperature; + __u64 max_socket_temperature_acc; + __u64 max_vr_temperature_acc; + __u64 max_hbm_temperature_acc; + + /* POWER */ + __u32 socket_power_limit; + __u32 max_socket_power_limit; + __u32 socket_power; + + /* ENERGY */ + __u64 timestamp; + __u64 socket_energy_acc; + __u64 ccd_energy_acc; + __u64 xcd_energy_acc; + __u64 aid_energy_acc; + __u64 hbm_energy_acc; + + /* FREQUENCY */ + __u32 cclk_frequency_limit; + __u32 gfxclk_frequency_limit; + __u32 fclk_frequency; + __u32 uclk_frequency; + __u32 socclk_frequency[4]; + __u32 vclk_frequency[4]; + __u32 dclk_frequency[4]; + __u32 lclk_frequency[4]; + __u64 gfxclk_frequency_acc[8]; + __u64 cclk_frequency_acc[96]; + + /* FREQUENCY RANGE */ + __u32 max_cclk_frequency; + __u32 min_cclk_frequency; + __u32 max_gfxclk_frequency; + __u32 min_gfxclk_frequency; + __u32 fclk_frequency_table[4]; + __u32 uclk_frequency_table[4]; + __u32 socclk_frequency_table[4]; + __u32 vclk_frequency_table[4]; + __u32 dclk_frequency_table[4]; + __u32 lclk_frequency_table[4]; + __u32 max_lclk_dpm_range; + __u32 min_lclk_dpm_range; + + /* XGMI */ + __u32 xgmi_width; + __u32 xgmi_bitrate; + __u64 xgmi_read_bandwidth_acc[8]; + __u64 xgmi_write_bandwidth_acc[8]; + + /* ACTIVITY */ + __u32 socket_c0_residency; + __u32 socket_gfx_busy; + __u32 dram_bandwidth_utilization; + __u64 socket_c0_residency_acc; + __u64 socket_gfx_busy_acc; + __u64 dram_bandwidth_acc; + __u32 max_dram_bandwidth; + __u64 dram_bandwidth_utilization_acc; + __u64 pcie_bandwidth_acc[4]; + + /* THROTTLERS */ + __u32 prochot_residency_acc; + __u32 ppt_residency_acc; + __u32 socket_thm_residency_acc; + __u32 vr_thm_residency_acc; + __u32 hbm_thm_residency_acc; + __u32 spare; + + /* New items at the end to maintain driver compatibility */ + __u32 gfxclk_frequency[8]; +}; + +/* Reset to default packing */ +#pragma pack() + +/* Define unique ioctl command for hsmp msgs using generic _IOWR */ +#define HSMP_BASE_IOCTL_NR 0xF8 +#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message) + +#endif /*_ASM_X86_AMD_HSMP_H_*/ diff --git a/arch/x86/include/uapi/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h index 580e3c567046..6beb55bbefa4 100644 --- a/arch/x86/include/uapi/asm/auxvec.h +++ b/arch/x86/include/uapi/asm/auxvec.h @@ -12,9 +12,9 @@ /* entries in ARCH_DLINFO: */ #if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) -# define AT_VECTOR_SIZE_ARCH 2 +# define AT_VECTOR_SIZE_ARCH 3 #else /* else it's non-compat x86-64 */ -# define AT_VECTOR_SIZE_ARCH 1 +# define AT_VECTOR_SIZE_ARCH 2 #endif #endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 60733f137e9a..dafbf581c515 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -2,14 +2,7 @@ #ifndef _ASM_X86_BOOTPARAM_H #define _ASM_X86_BOOTPARAM_H -/* setup_data types */ -#define SETUP_NONE 0 -#define SETUP_E820_EXT 1 -#define SETUP_DTB 2 -#define SETUP_PCI 3 -#define SETUP_EFI 4 -#define SETUP_APPLE_PROPERTIES 5 -#define SETUP_JAILHOUSE 6 +#include <asm/setup_data.h> /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -29,8 +22,11 @@ #define XLF_EFI_HANDOVER_32 (1<<2) #define XLF_EFI_HANDOVER_64 (1<<3) #define XLF_EFI_KEXEC (1<<4) +#define XLF_5LEVEL (1<<5) +#define XLF_5LEVEL_ENABLED (1<<6) +#define XLF_MEM_ENCRYPTION (1<<7) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/screen_info.h> @@ -39,14 +35,6 @@ #include <asm/ist.h> #include <video/edid.h> -/* extensible setup data list node */ -struct setup_data { - __u64 next; - __u32 type; - __u32 len; - __u8 data[0]; -}; - struct setup_header { __u8 setup_sects; __u16 root_flags; @@ -86,6 +74,7 @@ struct setup_header { __u64 pref_address; __u32 init_size; __u32 handover_offset; + __u32 kernel_info_offset; } __attribute__((packed)); struct sys_desc_table { @@ -119,35 +108,10 @@ struct efi_info { #define E820_MAX_ENTRIES_ZEROPAGE 128 /* - * The E820 memory region entry of the boot protocol ABI: - */ -struct boot_e820_entry { - __u64 addr; - __u64 size; - __u32 type; -} __attribute__((packed)); - -/* * Smallest compatible version of jailhouse_setup_data required by this kernel. */ #define JAILHOUSE_SETUP_REQUIRED_VERSION 1 -/* - * The boot loader is passing platform information via this Jailhouse-specific - * setup data structure. - */ -struct jailhouse_setup_data { - __u16 version; - __u16 compatible_version; - __u16 pm_timer_address; - __u16 num_cpus; - __u64 pci_mmconfig_base; - __u32 tsc_khz; - __u32 apic_khz; - __u8 standard_ioapic; - __u8 cpu_ids[255]; -} __attribute__((packed)); - /* The so-called "zeropage" */ struct boot_params { struct screen_info screen_info; /* 0x000 */ @@ -164,7 +128,8 @@ struct boot_params { __u32 ext_ramdisk_image; /* 0x0c0 */ __u32 ext_ramdisk_size; /* 0x0c4 */ __u32 ext_cmd_line_ptr; /* 0x0c8 */ - __u8 _pad4[116]; /* 0x0cc */ + __u8 _pad4[112]; /* 0x0cc */ + __u32 cc_blob_address; /* 0x13c */ struct edid_info edid_info; /* 0x140 */ struct efi_info efi_info; /* 0x1c0 */ __u32 alt_mem_k; /* 0x1e0 */ @@ -211,7 +176,7 @@ struct boot_params { * handling of page tables. * * These enums should only ever be used by x86 code, and the code that uses - * it should be well contained and compartamentalized. + * it should be well contained and compartmentalized. * * KVM and Xen HVM do not have a subarch as these are expected to follow * standard x86 boot entries. If there is a genuine need for "hypervisor" type @@ -229,10 +194,10 @@ struct boot_params { * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path, * which start at asm startup_xen() entry point and later jump to the C * xen_start_kernel() entry point. Both domU and dom0 type of guests are - * currently supportd through this PV boot path. + * currently supported through this PV boot path. * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform * systems which do not have the PCI legacy interfaces. - * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for + * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC * for settop boxes and media devices, the use of a subarch for CE4100 * is more of a hack... */ @@ -245,6 +210,6 @@ enum x86_hardware_subarch { X86_NR_SUBARCHS, }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/arch/x86/include/uapi/asm/byteorder.h b/arch/x86/include/uapi/asm/byteorder.h index 484e3cfd7ef2..149143cab9ff 100644 --- a/arch/x86/include/uapi/asm/byteorder.h +++ b/arch/x86/include/uapi/asm/byteorder.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _ASM_X86_BYTEORDER_H #define _ASM_X86_BYTEORDER_H diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index d95d080b30e3..41da492dfb01 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -15,7 +15,26 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ -/* Define reserved bits in DR6 which are always set to 1 */ +/* + * Define bits in DR6 which are set to 1 by default. + * + * This is also the DR6 architectural value following Power-up, Reset or INIT. + * + * Note, with the introduction of Bus Lock Detection (BLD) and Restricted + * Transactional Memory (RTM), the DR6 register has been modified: + * + * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports + * Bus Lock Detection. The assertion of a bus lock could clear it. + * + * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports + * restricted transactional memory. #DB occurred inside an RTM region + * could clear it. + * + * Apparently, DR6.BLD and DR6.RTM are active low bits. + * + * As a result, DR6_RESERVED is an incorrect name now, but it is kept for + * compatibility. + */ #define DR6_RESERVED (0xFFFF0FF0) #define DR_TRAP0 (0x1) /* db0 */ @@ -24,6 +43,7 @@ #define DR_TRAP3 (0x8) /* db3 */ #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) +#define DR_BUS_LOCK (0x800) /* bus_lock */ #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index 2f491efe3a12..55bc66867156 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -54,7 +54,7 @@ */ #define E820_RESERVED_KERN 128 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> struct e820entry { __u64 addr; /* start of memory segment */ @@ -76,7 +76,7 @@ struct e820map { #define BIOS_ROM_BASE 0xffe00000 #define BIOS_ROM_END 0xffffffff -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_E820_H */ diff --git a/arch/x86/include/uapi/asm/elf.h b/arch/x86/include/uapi/asm/elf.h new file mode 100644 index 000000000000..468e135fa285 --- /dev/null +++ b/arch/x86/include/uapi/asm/elf.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_X86_ELF_H +#define _UAPI_ASM_X86_ELF_H + +#include <linux/types.h> + +struct x86_xfeat_component { + __u32 type; + __u32 size; + __u32 offset; + __u32 flags; +} __packed; + +_Static_assert(sizeof(struct x86_xfeat_component) % 4 == 0, "x86_xfeat_component is not aligned"); + +#endif /* _UAPI_ASM_X86_ELF_H */ diff --git a/arch/x86/include/uapi/asm/errno.h b/arch/x86/include/uapi/asm/errno.h deleted file mode 100644 index 4c82b503d92f..000000000000 --- a/arch/x86/include/uapi/asm/errno.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/errno.h> diff --git a/arch/x86/include/uapi/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h deleted file mode 100644 index 46ab12db5739..000000000000 --- a/arch/x86/include/uapi/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h index 6ebaae90e207..054604aba9f0 100644 --- a/arch/x86/include/uapi/asm/hwcap2.h +++ b/arch/x86/include/uapi/asm/hwcap2.h @@ -1,8 +1,13 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _ASM_X86_HWCAP2_H #define _ASM_X86_HWCAP2_H +#include <linux/const.h> + /* MONITOR/MWAIT enabled in Ring 3 */ -#define HWCAP2_RING3MWAIT (1 << 0) +#define HWCAP2_RING3MWAIT _BITUL(0) + +/* Kernel allows FSGSBASE instructions available in Ring 3 */ +#define HWCAP2_FSGSBASE _BITUL(1) #endif diff --git a/arch/x86/include/uapi/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h deleted file mode 100644 index b279fe06dfe5..000000000000 --- a/arch/x86/include/uapi/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/x86/include/uapi/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h deleted file mode 100644 index ec34c760665e..000000000000 --- a/arch/x86/include/uapi/asm/ioctls.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctls.h> diff --git a/arch/x86/include/uapi/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d0..000000000000 --- a/arch/x86/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index dabfcf7c3941..7ceff6583652 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -7,11 +7,15 @@ * */ +#include <linux/const.h> +#include <linux/bits.h> #include <linux/types.h> #include <linux/ioctl.h> +#include <linux/stddef.h> #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define DE_VECTOR 0 #define DB_VECTOR 1 @@ -31,6 +35,11 @@ #define MC_VECTOR 18 #define XM_VECTOR 19 #define VE_VECTOR 20 +#define CP_VECTOR 21 + +#define HV_VECTOR 28 +#define VC_VECTOR 29 +#define SX_VECTOR 30 /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT @@ -38,7 +47,6 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 @@ -47,19 +55,10 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS -#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; -}; - /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ struct kvm_pic_state { __u8 last_irr; /* edge detection */ @@ -111,6 +110,8 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -157,6 +158,19 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +struct kvm_sregs2 { + /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 flags; + __u64 pdptrs[4]; +}; +#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1 + /* for KVM_GET_FPU and KVM_SET_FPU */ struct kvm_fpu { __u8 fpr[8][16]; @@ -183,15 +197,40 @@ struct kvm_msrs { __u32 nmsrs; /* number of msrs in entries */ __u32 pad; - struct kvm_msr_entry entries[0]; + struct kvm_msr_entry entries[]; }; /* for KVM_GET_MSR_INDEX_LIST */ struct kvm_msr_list { __u32 nmsrs; /* number of msrs in entries */ - __u32 indices[0]; + __u32 indices[]; +}; + +/* Maximum size of any access bitmap in bytes */ +#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600 + +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range { +#define KVM_MSR_FILTER_READ (1 << 0) +#define KVM_MSR_FILTER_WRITE (1 << 1) +#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \ + KVM_MSR_FILTER_WRITE) + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* MSR index the bitmap starts at */ + __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */ }; +#define KVM_MSR_FILTER_MAX_RANGES 16 +struct kvm_msr_filter { +#ifndef __KERNEL__ +#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#endif +#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) +#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY) + __u32 flags; + struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; struct kvm_cpuid_entry { __u32 function; @@ -206,7 +245,7 @@ struct kvm_cpuid_entry { struct kvm_cpuid { __u32 nent; __u32 padding; - struct kvm_cpuid_entry entries[0]; + struct kvm_cpuid_entry entries[]; }; struct kvm_cpuid_entry2 { @@ -228,7 +267,7 @@ struct kvm_cpuid_entry2 { struct kvm_cpuid2 { __u32 nent; __u32 padding; - struct kvm_cpuid_entry2 entries[0]; + struct kvm_cpuid_entry2 entries[]; }; /* for KVM_GET_PIT and KVM_SET_PIT */ @@ -260,6 +299,7 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW_BP 0x00020000 #define KVM_GUESTDBG_INJECT_DB 0x00040000 #define KVM_GUESTDBG_INJECT_BP 0x00080000 +#define KVM_GUESTDBG_BLOCKIRQ 0x00100000 /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { @@ -270,7 +310,8 @@ struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; -#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001 +#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001 +#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002 struct kvm_pit_state2 { struct kvm_pit_channel_state channels[3]; @@ -289,6 +330,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 #define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 +#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -323,7 +365,10 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u8 reserved[27]; + struct { + __u8 pending; + } triple_fault; + __u8 reserved[26]; __u8 exception_has_payload; __u64 exception_payload; }; @@ -337,9 +382,23 @@ struct kvm_debugregs { __u64 reserved[9]; }; -/* for KVM_CAP_XSAVE */ +/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ struct kvm_xsave { + /* + * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes + * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * respectively, when invoked on the vm file descriptor. + * + * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * will always be at least 4096. Currently, it is only greater + * than 4096 if a dynamic feature has been enabled with + * ``arch_prctl()``, but this may change in the future. + * + * The offsets of the state save areas in struct kvm_xsave follow + * the contents of CPUID leaf 0xD on the host. + */ __u32 region[1024]; + __u32 extra[]; }; #define KVM_MAX_XCRS 16 @@ -357,6 +416,35 @@ struct kvm_xcrs { __u64 padding[16]; }; +#define KVM_X86_REG_TYPE_MSR 2 +#define KVM_X86_REG_TYPE_KVM 3 + +#define KVM_X86_KVM_REG_SIZE(reg) \ +({ \ + reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \ +}) + +#define KVM_X86_REG_TYPE_SIZE(type, reg) \ +({ \ + __u64 type_size = (__u64)type << 32; \ + \ + type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \ + type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \ + 0; \ + type_size; \ +}) + +#define KVM_X86_REG_ID(type, index) \ + (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index) + +#define KVM_X86_REG_MSR(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index) +#define KVM_X86_REG_KVM(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index) + +/* KVM-defined registers starting from 0 */ +#define KVM_REG_GUEST_SSP 0 + #define KVM_SYNC_X86_REGS (1UL << 0) #define KVM_SYNC_X86_SREGS (1UL << 1) #define KVM_SYNC_X86_EVENTS (1UL << 2) @@ -378,46 +466,581 @@ struct kvm_sync_regs { struct kvm_vcpu_events events; }; -#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) -#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) -#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) +#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) +#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) +#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) +#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) +#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) +#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) +#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) + +#define KVM_STATE_NESTED_FORMAT_VMX 0 +#define KVM_STATE_NESTED_FORMAT_SVM 1 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 +#define KVM_STATE_NESTED_MTF_PENDING 0x00000008 +#define KVM_STATE_NESTED_GIF_SET 0x00000100 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 -struct kvm_vmx_nested_state { +#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 + +#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000 + +#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + +/* vendor-independent attributes for system fd (group 0) */ +#define KVM_X86_GRP_SYSTEM 0 +# define KVM_X86_XCOMP_GUEST_SUPP 0 + +/* vendor-specific groups and attributes for system fd */ +#define KVM_X86_GRP_SEV 1 +# define KVM_X86_SEV_VMSA_FEATURES 0 +# define KVM_X86_SNP_POLICY_BITS 1 + +struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +}; + +struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; - __u64 vmcs_pa; + __u64 vmcs12_pa; struct { __u16 flags; } smm; + + __u16 pad; + + __u32 flags; + __u64 preemption_timer_deadline; +}; + +struct kvm_svm_nested_state_data { + /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */ + __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE]; +}; + +struct kvm_svm_nested_state_hdr { + __u64 vmcb_pa; }; /* for KVM_CAP_NESTED_STATE */ struct kvm_nested_state { - /* KVM_STATE_* flags */ __u16 flags; - - /* 0 for VMX, 1 for SVM. */ __u16 format; - - /* 128 for SVM, 128 + VMCS size for VMX. */ __u32 size; union { - /* VMXON, VMCS */ - struct kvm_vmx_nested_state vmx; + struct kvm_vmx_nested_state_hdr vmx; + struct kvm_svm_nested_state_hdr svm; /* Pad the header to 128 bytes. */ __u8 pad[120]; - }; + } hdr; + + /* + * Define data region as 0 bytes to preserve backwards-compatability + * to old definition of kvm_nested_state in order to avoid changing + * KVM_{GET,PUT}_NESTED_STATE ioctl values. + */ + union { + __DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx); + __DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm); + } data; +}; + +/* for KVM_CAP_PMU_EVENT_FILTER */ +struct kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[]; +}; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) +#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + +/* for KVM_CAP_MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; + +/* for KVM_CAP_XEN_HVM */ +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) + +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + __u8 runstate_update_flag; + union { + __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) + __u64 hva; + } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u32 xen_version; + __u64 pad[8]; + } u; +}; + + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) + __u64 hva; + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; + __u8 vector; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, + + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u32 pad0; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; + __u16 ghcb_version; + __u16 pad1; + __u32 pad2[8]; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u32 pad0; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u32 pad1; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u32 pad2; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u32 pad3; + __u64 session_uaddr; + __u32 session_len; + __u32 pad4; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_PAGE_TYPE_INVALID 0x0 +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + +/* + * Masked event layout. + * Bits Description + * ---- ----------- + * 7:0 event select (low bits) + * 15:8 umask match + * 31:16 unused + * 35:32 event select (high bits) + * 36:54 unused + * 55 exclude bit + * 63:56 umask mask + */ + +#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \ + (((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \ + (((mask) & 0xFFULL) << 56) | \ + (((match) & 0xFFULL) << 8) | \ + ((__u64)(!!(exclude)) << 55)) + +#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ + (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + +/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ +#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + +/* x86-specific KVM_EXIT_HYPERCALL flags. */ +#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) + +#define KVM_X86_DEFAULT_VM 0 +#define KVM_X86_SW_PROTECTED_VM 1 +#define KVM_X86_SEV_VM 2 +#define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 + +/* Trust Domain eXtension sub-ioctl() commands. */ +enum kvm_tdx_cmd_id { + KVM_TDX_CAPABILITIES = 0, + KVM_TDX_INIT_VM, + KVM_TDX_INIT_VCPU, + KVM_TDX_INIT_MEM_REGION, + KVM_TDX_FINALIZE_VM, + KVM_TDX_GET_CPUID, + + KVM_TDX_CMD_NR_MAX, +}; + +struct kvm_tdx_cmd { + /* enum kvm_tdx_cmd_id */ + __u32 id; + /* flags for sub-commend. If sub-command doesn't use this, set zero. */ + __u32 flags; + /* + * data for each sub-command. An immediate or a pointer to the actual + * data in process virtual address. If sub-command doesn't use it, + * set zero. + */ + __u64 data; + /* + * Auxiliary error code. The sub-command may return TDX SEAMCALL + * status code in addition to -Exxx. + */ + __u64 hw_error; +}; + +struct kvm_tdx_capabilities { + __u64 supported_attrs; + __u64 supported_xfam; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; + + /* Configurable CPUID bits for userspace */ + struct kvm_cpuid2 cpuid; +}; + +struct kvm_tdx_init_vm { + __u64 attributes; + __u64 xfam; + __u64 mrconfigid[6]; /* sha384 digest */ + __u64 mrowner[6]; /* sha384 digest */ + __u64 mrownerconfig[6]; /* sha384 digest */ + + /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */ + __u64 reserved[12]; + + /* + * Call KVM_TDX_INIT_VM before vcpu creation, thus before + * KVM_SET_CPUID2. + * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the + * TDX module directly virtualizes those CPUIDs without VMM. The user + * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with + * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of + * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX + * module doesn't virtualize. + */ + struct kvm_cpuid2 cpuid; +}; + +#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0) - __u8 data[0]; +struct kvm_tdx_init_mem_region { + __u64 source_addr; + __u64 gpa; + __u64 nr_pages; }; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 19980ec1a316..a1efa7907a0b 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -8,6 +8,7 @@ * should be used to determine that a VM is running under KVM. */ #define KVM_CPUID_SIGNATURE 0x40000000 +#define KVM_SIGNATURE "KVMKVMKVM\0\0\0" /* This CPUID returns two feature bitmaps in eax, edx. Before enabling * a particular paravirtualization, the appropriate feature bit should @@ -29,6 +30,12 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 +#define KVM_FEATURE_POLL_CONTROL 12 +#define KVM_FEATURE_PV_SCHED_YIELD 13 +#define KVM_FEATURE_ASYNC_PF_INT 14 +#define KVM_FEATURE_MSI_EXT_DEST_ID 15 +#define KVM_FEATURE_HC_MAP_GPA_RANGE 16 +#define KVM_FEATURE_MIGRATION_CONTROL 17 #define KVM_HINTS_REALTIME 0 @@ -47,6 +54,10 @@ #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_POLL_CONTROL 0x4b564d05 +#define MSR_KVM_ASYNC_PF_INT 0x4b564d06 +#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 +#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 struct kvm_steal_time { __u64 steal; @@ -78,6 +89,21 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) +#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + +/* MSR_KVM_ASYNC_PF_INT */ +#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) + +/* MSR_KVM_MIGRATION_CONTROL */ +#define KVM_MIGRATION_READY (1 << 0) + +/* KVM_HC_MAP_GPA_RANGE */ +#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0 +#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0) +#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1) +#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4) +#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1) +#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 @@ -109,9 +135,13 @@ struct kvm_mmu_op_release_pt { #define KVM_PV_REASON_PAGE_READY 2 struct kvm_vcpu_pv_apf_data { - __u32 reason; - __u8 pad[60]; - __u32 enabled; + /* Used for 'page not present' events delivered via #PF */ + __u32 flags; + + /* Used for 'page ready' events delivered via interrupt notification */ + __u32 token; + + __u8 pad[56]; }; #define KVM_PV_EOI_BIT 0 diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index d62ac5db093b..a82c039d8e6a 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -12,7 +12,7 @@ /* The size of each LDT entry. */ #define LDT_ENTRY_SIZE 8 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Note on 64bit base and limit is ignored and you cannot set DS/ES/CS * not to the default values if you still want to do syscalls. This @@ -44,5 +44,5 @@ struct user_desc { #define MODIFY_LDT_CONTENTS_STACK 1 #define MODIFY_LDT_CONTENTS_CODE 2 -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_LDT_H */ diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 955c2a2e1cf9..cb6b48a7c22b 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -8,7 +8,8 @@ /* * Fields are zero when not available. Also, this struct is shared with * userspace mcelog and thus must keep existing fields at current offsets. - * Only add new fields to the end of the structure + * Only add new, shared fields to the end of the structure. + * Do not add vendor-specific fields. */ struct mce { __u64 status; /* Bank's MCi_STATUS MSR */ @@ -35,6 +36,7 @@ struct mce { __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ppin; /* Protected Processor Inventory Number */ __u32 microcode; /* Microcode revision */ + __u64 kflags; /* Internal kernel use */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index d4a8d0424bfb..ac1e6277212b 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -3,28 +3,7 @@ #define _ASM_X86_MMAN_H #define MAP_32BIT 0x40 /* only give out 32bit addresses */ - -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -/* - * Take the 4 protection key bits out of the vma->vm_flags - * value and turn them in to the bits that we can put in - * to a pte. - * - * Only override these if Protection Keys are available - * (which is only on 64-bit). - */ -#define arch_vm_get_page_prot(vm_flags) __pgprot( \ - ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \ - ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \ - ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \ - ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0)) - -#define arch_calc_vm_prot_bits(prot, key) ( \ - ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ - ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ - ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \ - ((key) & 0x8 ? VM_PKEY_BIT3 : 0)) -#endif +#define MAP_ABOVE4G 0x80 /* only map above 4GB */ #include <asm-generic/mman.h> diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h index 90ab9a795b49..ac83e25bbf37 100644 --- a/arch/x86/include/uapi/asm/msgbuf.h +++ b/arch/x86/include/uapi/asm/msgbuf.h @@ -5,19 +5,22 @@ #if !defined(__x86_64__) || !defined(__ILP32__) #include <asm-generic/msgbuf.h> #else + +#include <asm/ipcbuf.h> + /* * The msqid64_ds structure for x86 architecture with x32 ABI. * * On x86-32 and x86-64 we can just use the generic definition, but - * x32 uses the same binary layout as x86_64, which is differnet + * x32 uses the same binary layout as x86_64, which is different * from other 32-bit architectures. */ struct msqid64_ds { struct ipc64_perm msg_perm; - __kernel_time_t msg_stime; /* last msgsnd time */ - __kernel_time_t msg_rtime; /* last msgrcv time */ - __kernel_time_t msg_ctime; /* last change time */ + __kernel_long_t msg_stime; /* last msgsnd time */ + __kernel_long_t msg_rtime; /* last msgrcv time */ + __kernel_long_t msg_ctime; /* last change time */ __kernel_ulong_t msg_cbytes; /* current number of bytes on queue */ __kernel_ulong_t msg_qnum; /* number of messages in queue */ __kernel_ulong_t msg_qbytes; /* max number of bytes on queue */ diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h index e7516b402a00..4b8917ca28fe 100644 --- a/arch/x86/include/uapi/asm/msr.h +++ b/arch/x86/include/uapi/asm/msr.h @@ -2,7 +2,7 @@ #ifndef _UAPI_ASM_X86_MSR_H #define _UAPI_ASM_X86_MSR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/ioctl.h> @@ -10,5 +10,5 @@ #define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8]) #define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8]) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_MSR_H */ diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h index 376563f2bac1..3a8a8eb8ac3a 100644 --- a/arch/x86/include/uapi/asm/mtrr.h +++ b/arch/x86/include/uapi/asm/mtrr.h @@ -81,14 +81,6 @@ typedef __u8 mtrr_type; #define MTRR_NUM_FIXED_RANGES 88 #define MTRR_MAX_VAR_RANGES 256 -struct mtrr_state_type { - struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; - mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) @@ -115,9 +107,9 @@ struct mtrr_state_type { #define MTRR_NUM_TYPES 7 /* - * Invalid MTRR memory type. mtrr_type_lookup() returns this value when - * MTRRs are disabled. Note, this value is allocated from the reserved - * values (0x7-0xff) of the MTRR memory types. + * Invalid MTRR memory type. No longer used outside of MTRR code. + * Note, this value is allocated from the reserved values (0x7-0xff) of + * the MTRR memory types. */ #define MTRR_TYPE_INVALID 0xff diff --git a/arch/x86/include/uapi/asm/param.h b/arch/x86/include/uapi/asm/param.h deleted file mode 100644 index 965d45427975..000000000000 --- a/arch/x86/include/uapi/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/param.h> diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index f3329cabce5c..7c9d2bb3833b 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -27,8 +27,32 @@ enum perf_event_x86_regs { PERF_REG_X86_R13, PERF_REG_X86_R14, PERF_REG_X86_R15, - + /* These are the limits for the GPRs. */ PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, + + /* These all need two bits set because they are 128bit */ + PERF_REG_X86_XMM0 = 32, + PERF_REG_X86_XMM1 = 34, + PERF_REG_X86_XMM2 = 36, + PERF_REG_X86_XMM3 = 38, + PERF_REG_X86_XMM4 = 40, + PERF_REG_X86_XMM5 = 42, + PERF_REG_X86_XMM6 = 44, + PERF_REG_X86_XMM7 = 46, + PERF_REG_X86_XMM8 = 48, + PERF_REG_X86_XMM9 = 50, + PERF_REG_X86_XMM10 = 52, + PERF_REG_X86_XMM11 = 54, + PERF_REG_X86_XMM12 = 56, + PERF_REG_X86_XMM13 = 58, + PERF_REG_X86_XMM14 = 60, + PERF_REG_X86_XMM15 = 62, + + /* These include both GPRs and XMMX registers */ + PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 5a6aac9fa41f..384e2cc6ac19 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -2,16 +2,42 @@ #ifndef _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 -#define ARCH_GET_CPUID 0x1011 -#define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_CPUID 0x1011 +#define ARCH_SET_CPUID 0x1012 -#define ARCH_MAP_VDSO_X32 0x2001 -#define ARCH_MAP_VDSO_32 0x2002 -#define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_GET_XCOMP_SUPP 0x1021 +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_GUEST_PERM 0x1024 +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + +#define ARCH_XCOMP_TILECFG 17 +#define ARCH_XCOMP_TILEDATA 18 + +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 + +/* Don't use 0x3001-0x3004 because of old glibcs */ + +#define ARCH_GET_UNTAG_MASK 0x4001 +#define ARCH_ENABLE_TAGGED_ADDR 0x4002 +#define ARCH_GET_MAX_TAG_BITS 0x4003 +#define ARCH_FORCE_TAGGED_SVA 0x4004 + +#define ARCH_SHSTK_ENABLE 0x5001 +#define ARCH_SHSTK_DISABLE 0x5002 +#define ARCH_SHSTK_LOCK 0x5003 +#define ARCH_SHSTK_UNLOCK 0x5004 +#define ARCH_SHSTK_STATUS 0x5005 + +/* ARCH_SHSTK_ features bits */ +#define ARCH_SHSTK_SHSTK (1ULL << 0) +#define ARCH_SHSTK_WRSS (1ULL << 1) #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index bcba3c643e63..81d0c8bf1137 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -82,6 +82,10 @@ #define X86_CR3_PCID_BITS 12 #define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL)) +#define X86_CR3_LAM_U57_BIT 61 /* Activate LAM for userspace, 62:57 bits masked */ +#define X86_CR3_LAM_U57 _BITULL(X86_CR3_LAM_U57_BIT) +#define X86_CR3_LAM_U48_BIT 62 /* Activate LAM for userspace, 62:48 bits masked */ +#define X86_CR3_LAM_U48 _BITULL(X86_CR3_LAM_U48_BIT) #define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ #define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) @@ -130,6 +134,19 @@ #define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT) #define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */ #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) +#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ +#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) +#define X86_CR4_LASS_BIT 27 /* enable Linear Address Space Separation support */ +#define X86_CR4_LASS _BITUL(X86_CR4_LASS_BIT) +#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */ +#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT) + +#ifdef __x86_64__ +#define X86_CR4_FRED_BIT 32 /* enable FRED kernel entry */ +#define X86_CR4_FRED _BITUL(X86_CR4_FRED_BIT) +#else +#define X86_CR4_FRED (0) +#endif /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h index 16074b9c93bb..5823584dea13 100644 --- a/arch/x86/include/uapi/asm/ptrace-abi.h +++ b/arch/x86/include/uapi/asm/ptrace-abi.h @@ -25,7 +25,7 @@ #else /* __i386__ */ -#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS) /* * C ABI says these regs are callee-preserved. They aren't saved on kernel entry * unless syscall needs a complete, fully filled "struct pt_regs". @@ -57,7 +57,7 @@ #define EFLAGS 144 #define RSP 152 #define SS 160 -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* top of stack page */ #define FRAME_SIZE 168 @@ -87,7 +87,7 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #endif diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h index 85165c0edafc..e0b5b4f6226b 100644 --- a/arch/x86/include/uapi/asm/ptrace.h +++ b/arch/x86/include/uapi/asm/ptrace.h @@ -7,7 +7,7 @@ #include <asm/processor-flags.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef __i386__ /* this struct defines the way the registers are stored on the @@ -81,6 +81,6 @@ struct pt_regs { -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/uapi/asm/resource.h b/arch/x86/include/uapi/asm/resource.h deleted file mode 100644 index 04bc4db8921b..000000000000 --- a/arch/x86/include/uapi/asm/resource.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/resource.h> diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h index 89de6cd9f0a7..71205b02a191 100644 --- a/arch/x86/include/uapi/asm/sembuf.h +++ b/arch/x86/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_SEMBUF_H #define _ASM_X86_SEMBUF_H +#include <asm/ipcbuf.h> + /* * The semid64_ds structure for x86 architecture. * Note extra padding because this structure is passed back and forth @@ -21,9 +23,9 @@ struct semid64_ds { unsigned long sem_ctime; /* last change time */ unsigned long sem_ctime_high; #else - __kernel_time_t sem_otime; /* last semop time */ + __kernel_long_t sem_otime; /* last semop time */ __kernel_ulong_t __unused1; - __kernel_time_t sem_ctime; /* last change time */ + __kernel_long_t sem_ctime; /* last change time */ __kernel_ulong_t __unused2; #endif __kernel_ulong_t sem_nsems; /* no. of semaphores in array */ diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h new file mode 100644 index 000000000000..2671c4e1b3a0 --- /dev/null +++ b/arch/x86/include/uapi/asm/setup_data.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_X86_SETUP_DATA_H +#define _UAPI_ASM_X86_SETUP_DATA_H + +/* setup_data/setup_indirect types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 +#define SETUP_CC_BLOB 7 +#define SETUP_IMA 8 +#define SETUP_RNG_SEED 9 +#define SETUP_KEXEC_KHO 10 +#define SETUP_ENUM_MAX SETUP_KEXEC_KHO + +#define SETUP_INDIRECT (1<<31) +#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) + +#ifndef __ASSEMBLER__ + +#include <linux/types.h> + +/* extensible setup data list node */ +struct setup_data { + __u64 next; + __u32 type; + __u32 len; + __u8 data[]; +}; + +/* extensible setup indirect data node */ +struct setup_indirect { + __u32 type; + __u32 reserved; /* Reserved, must be set to zero. */ + __u64 len; + __u64 addr; +}; + +/* + * The E820 memory region entry of the boot protocol ABI: + */ +struct boot_e820_entry { + __u64 addr; + __u64 size; + __u32 type; +} __attribute__((packed)); + +/* + * The boot loader is passing platform information via this Jailhouse-specific + * setup data structure. + */ +struct jailhouse_setup_data { + struct { + __u16 version; + __u16 compatible_version; + } __attribute__((packed)) hdr; + struct { + __u16 pm_timer_address; + __u16 num_cpus; + __u64 pci_mmconfig_base; + __u32 tsc_khz; + __u32 apic_khz; + __u8 standard_ioapic; + __u8 cpu_ids[255]; + } __attribute__((packed)) v1; + struct { + __u32 flags; + } __attribute__((packed)) v2; +} __attribute__((packed)); + +/* + * IMA buffer setup data information from the previous kernel during kexec + */ +struct ima_setup_data { + __u64 addr; + __u64 size; +} __attribute__((packed)); + +/* + * Locations of kexec handover metadata + */ +struct kho_data { + __u64 fdt_addr; + __u64 fdt_size; + __u64 scratch_addr; + __u64 scratch_size; +} __attribute__((packed)); + +#endif /* __ASSEMBLER__ */ + +#endif /* _UAPI_ASM_X86_SETUP_DATA_H */ diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h new file mode 100644 index 000000000000..3c4d52072189 --- /dev/null +++ b/arch/x86/include/uapi/asm/sgx.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright(c) 2016-20 Intel Corporation. + */ +#ifndef _UAPI_ASM_X86_SGX_H +#define _UAPI_ASM_X86_SGX_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/** + * enum sgx_page_flags - page control flags + * @SGX_PAGE_MEASURE: Measure the page contents with a sequence of + * ENCLS[EEXTEND] operations. + */ +enum sgx_page_flags { + SGX_PAGE_MEASURE = 0x01, +}; + +#define SGX_MAGIC 0xA4 + +#define SGX_IOC_ENCLAVE_CREATE \ + _IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create) +#define SGX_IOC_ENCLAVE_ADD_PAGES \ + _IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages) +#define SGX_IOC_ENCLAVE_INIT \ + _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init) +#define SGX_IOC_ENCLAVE_PROVISION \ + _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision) +#define SGX_IOC_VEPC_REMOVE_ALL \ + _IO(SGX_MAGIC, 0x04) +#define SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS \ + _IOWR(SGX_MAGIC, 0x05, struct sgx_enclave_restrict_permissions) +#define SGX_IOC_ENCLAVE_MODIFY_TYPES \ + _IOWR(SGX_MAGIC, 0x06, struct sgx_enclave_modify_types) +#define SGX_IOC_ENCLAVE_REMOVE_PAGES \ + _IOWR(SGX_MAGIC, 0x07, struct sgx_enclave_remove_pages) + +/** + * struct sgx_enclave_create - parameter structure for the + * %SGX_IOC_ENCLAVE_CREATE ioctl + * @src: address for the SECS page data + */ +struct sgx_enclave_create { + __u64 src; +}; + +/** + * struct sgx_enclave_add_pages - parameter structure for the + * %SGX_IOC_ENCLAVE_ADD_PAGE ioctl + * @src: start address for the page data + * @offset: starting page offset + * @length: length of the data (multiple of the page size) + * @secinfo: address for the SECINFO data + * @flags: page control flags + * @count: number of bytes added (multiple of the page size) + */ +struct sgx_enclave_add_pages { + __u64 src; + __u64 offset; + __u64 length; + __u64 secinfo; + __u64 flags; + __u64 count; +}; + +/** + * struct sgx_enclave_init - parameter structure for the + * %SGX_IOC_ENCLAVE_INIT ioctl + * @sigstruct: address for the SIGSTRUCT data + */ +struct sgx_enclave_init { + __u64 sigstruct; +}; + +/** + * struct sgx_enclave_provision - parameter structure for the + * %SGX_IOC_ENCLAVE_PROVISION ioctl + * @fd: file handle of /dev/sgx_provision + */ +struct sgx_enclave_provision { + __u64 fd; +}; + +/** + * struct sgx_enclave_restrict_permissions - parameters for ioctl + * %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS + * @offset: starting page offset (page aligned relative to enclave base + * address defined in SECS) + * @length: length of memory (multiple of the page size) + * @permissions:new permission bits for pages in range described by @offset + * and @length + * @result: (output) SGX result code of ENCLS[EMODPR] function + * @count: (output) bytes successfully changed (multiple of page size) + */ +struct sgx_enclave_restrict_permissions { + __u64 offset; + __u64 length; + __u64 permissions; + __u64 result; + __u64 count; +}; + +/** + * struct sgx_enclave_modify_types - parameters for ioctl + * %SGX_IOC_ENCLAVE_MODIFY_TYPES + * @offset: starting page offset (page aligned relative to enclave base + * address defined in SECS) + * @length: length of memory (multiple of the page size) + * @page_type: new type for pages in range described by @offset and @length + * @result: (output) SGX result code of ENCLS[EMODT] function + * @count: (output) bytes successfully changed (multiple of page size) + */ +struct sgx_enclave_modify_types { + __u64 offset; + __u64 length; + __u64 page_type; + __u64 result; + __u64 count; +}; + +/** + * struct sgx_enclave_remove_pages - %SGX_IOC_ENCLAVE_REMOVE_PAGES parameters + * @offset: starting page offset (page aligned relative to enclave base + * address defined in SECS) + * @length: length of memory (multiple of the page size) + * @count: (output) bytes successfully changed (multiple of page size) + * + * Regular (PT_REG) or TCS (PT_TCS) can be removed from an initialized + * enclave if the system supports SGX2. First, the %SGX_IOC_ENCLAVE_MODIFY_TYPES + * ioctl() should be used to change the page type to PT_TRIM. After that + * succeeds ENCLU[EACCEPT] should be run from within the enclave and then + * %SGX_IOC_ENCLAVE_REMOVE_PAGES can be used to complete the page removal. + */ +struct sgx_enclave_remove_pages { + __u64 offset; + __u64 length; + __u64 count; +}; + +struct sgx_enclave_run; + +/** + * typedef sgx_enclave_user_handler_t - Exit handler function accepted by + * __vdso_sgx_enter_enclave() + * @rdi: RDI at the time of EEXIT, undefined on AEX + * @rsi: RSI at the time of EEXIT, undefined on AEX + * @rdx: RDX at the time of EEXIT, undefined on AEX + * @rsp: RSP (untrusted) at the time of EEXIT or AEX + * @r8: R8 at the time of EEXIT, undefined on AEX + * @r9: R9 at the time of EEXIT, undefined on AEX + * @run: The run instance given by the caller + * + * The register parameters contain the snapshot of their values at enclave + * exit. An invalid ENCLU function number will cause -EINVAL to be returned + * to the caller. + * + * Return: + * - <= 0: The given value is returned back to the caller. + * - > 0: ENCLU function to invoke, either EENTER or ERESUME. + */ +typedef int (*sgx_enclave_user_handler_t)(long rdi, long rsi, long rdx, + long rsp, long r8, long r9, + struct sgx_enclave_run *run); + +/** + * struct sgx_enclave_run - the execution context of __vdso_sgx_enter_enclave() + * @tcs: TCS used to enter the enclave + * @function: The last seen ENCLU function (EENTER, ERESUME or EEXIT) + * @exception_vector: The interrupt vector of the exception + * @exception_error_code: The exception error code pulled out of the stack + * @exception_addr: The address that triggered the exception + * @user_handler: User provided callback run on exception + * @user_data: Data passed to the user handler + * @reserved: Reserved for future extensions + * + * If @user_handler is provided, the handler will be invoked on all return paths + * of the normal flow. The user handler may transfer control, e.g. via a + * longjmp() call or a C++ exception, without returning to + * __vdso_sgx_enter_enclave(). + */ +struct sgx_enclave_run { + __u64 tcs; + __u32 function; + __u16 exception_vector; + __u16 exception_error_code; + __u64 exception_addr; + __u64 user_handler; + __u64 user_data; + __u8 reserved[216]; +}; + +/** + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(), + * a vDSO function to enter an SGX enclave. + * @rdi: Pass-through value for RDI + * @rsi: Pass-through value for RSI + * @rdx: Pass-through value for RDX + * @function: ENCLU function, must be EENTER or ERESUME + * @r8: Pass-through value for R8 + * @r9: Pass-through value for R9 + * @run: struct sgx_enclave_run, must be non-NULL + * + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the + * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting + * state in accordance with the x86-64 ABI is the responsibility of the enclave + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C + * code without careful consideration by both the enclave and its runtime. + * + * All general purpose registers except RAX, RBX and RCX are passed as-is to the + * enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are loaded + * with @function, asynchronous exit pointer, and @run.tcs respectively. + * + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the + * pre-enclave state, e.g. to retrieve @run.exception and @run.user_handler + * after an enclave exit. All other registers are available for use by the + * enclave and its runtime, e.g. an enclave can push additional data onto the + * stack (and modify RSP) to pass information to the optional user handler (see + * below). + * + * Most exceptions reported on ENCLU, including those that occur within the + * enclave, are fixed up and reported synchronously instead of being delivered + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are + * never fixed up and are always delivered via standard signals. On synchronously + * reported exceptions, -EFAULT is returned and details about the exception are + * recorded in @run.exception, the optional sgx_enclave_exception struct. + * + * Return: + * - 0: ENCLU function was successfully executed. + * - -EINVAL: Invalid ENCL number (neither EENTER nor ERESUME). + */ +typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi, + unsigned long rdx, unsigned int function, + unsigned long r8, unsigned long r9, + struct sgx_enclave_run *run); + +#endif /* _UAPI_ASM_X86_SGX_H */ diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h index 644421f3823b..13775bfdfee2 100644 --- a/arch/x86/include/uapi/asm/shmbuf.h +++ b/arch/x86/include/uapi/asm/shmbuf.h @@ -5,20 +5,24 @@ #if !defined(__x86_64__) || !defined(__ILP32__) #include <asm-generic/shmbuf.h> #else + +#include <asm/ipcbuf.h> +#include <asm/posix_types.h> + /* * The shmid64_ds structure for x86 architecture with x32 ABI. * * On x86-32 and x86-64 we can just use the generic definition, but - * x32 uses the same binary layout as x86_64, which is differnet + * x32 uses the same binary layout as x86_64, which is different * from other 32-bit architectures. */ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ - __kernel_time_t shm_dtime; /* last detach time */ - __kernel_time_t shm_ctime; /* last change time */ + __kernel_size_t shm_segsz; /* size of segment (bytes) */ + __kernel_long_t shm_atime; /* last attach time */ + __kernel_long_t shm_dtime; /* last detach time */ + __kernel_long_t shm_ctime; /* last change time */ __kernel_pid_t shm_cpid; /* pid of creator */ __kernel_pid_t shm_lpid; /* pid of last operator */ __kernel_ulong_t shm_nattch; /* no. of current attaches */ diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 844d60eb1882..d0d9b331d3a1 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -139,7 +139,7 @@ struct _fpstate_32 { * The 64-bit FPU frame. (FXSAVE format and later) * * Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is - * larger: 'struct _xstate'. Note that 'struct _xstate' embedds + * larger: 'struct _xstate'. Note that 'struct _xstate' embeds * 'struct _fpstate' so that you can always assume the _fpstate portion * exists so that you can check the magic value. * diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h index 6b18e88de8a6..7114801d0499 100644 --- a/arch/x86/include/uapi/asm/sigcontext32.h +++ b/arch/x86/include/uapi/asm/sigcontext32.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _ASM_X86_SIGCONTEXT32_H #define _ASM_X86_SIGCONTEXT32_H diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h index e5745d593dc7..1067efabf18b 100644 --- a/arch/x86/include/uapi/asm/signal.h +++ b/arch/x86/include/uapi/asm/signal.h @@ -2,9 +2,8 @@ #ifndef _UAPI_ASM_X86_SIGNAL_H #define _UAPI_ASM_X86_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> -#include <linux/time.h> #include <linux/compiler.h> /* Avoid too many header ordering problems. */ @@ -17,7 +16,7 @@ struct siginfo; typedef unsigned long sigset_t; #endif /* __KERNEL__ */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define SIGHUP 1 @@ -62,30 +61,6 @@ typedef unsigned long sigset_t; #define SIGRTMIN 32 #define SIGRTMAX _NSIG -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ -#define SA_NOCLDSTOP 0x00000001u -#define SA_NOCLDWAIT 0x00000002u -#define SA_SIGINFO 0x00000004u -#define SA_ONSTACK 0x08000000u -#define SA_RESTART 0x10000000u -#define SA_NODEFER 0x40000000u -#define SA_RESETHAND 0x80000000u - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND - #define SA_RESTORER 0x04000000 #define MINSIGSTKSZ 2048 @@ -93,7 +68,7 @@ typedef unsigned long sigset_t; #include <asm-generic/signal-defs.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ # ifndef __KERNEL__ @@ -128,9 +103,9 @@ struct sigaction { typedef struct sigaltstack { void __user *ss_sp; int ss_flags; - size_t ss_size; + __kernel_size_t ss_size; } stack_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_ASM_X86_SIGNAL_H */ diff --git a/arch/x86/include/uapi/asm/socket.h b/arch/x86/include/uapi/asm/socket.h deleted file mode 100644 index 6b71384b9d8b..000000000000 --- a/arch/x86/include/uapi/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/socket.h> diff --git a/arch/x86/include/uapi/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h deleted file mode 100644 index def6d4746ee7..000000000000 --- a/arch/x86/include/uapi/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sockios.h> diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index a9731f8a480f..650e3256ea7d 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -29,6 +29,7 @@ #define SVM_EXIT_WRITE_DR6 0x036 #define SVM_EXIT_WRITE_DR7 0x037 #define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_LAST_EXCP 0x05f #define SVM_EXIT_INTR 0x060 #define SVM_EXIT_NMI 0x061 #define SVM_EXIT_SMI 0x062 @@ -75,9 +76,63 @@ #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_EFER_WRITE_TRAP 0x08f +#define SVM_EXIT_CR0_WRITE_TRAP 0x090 +#define SVM_EXIT_CR1_WRITE_TRAP 0x091 +#define SVM_EXIT_CR2_WRITE_TRAP 0x092 +#define SVM_EXIT_CR3_WRITE_TRAP 0x093 +#define SVM_EXIT_CR4_WRITE_TRAP 0x094 +#define SVM_EXIT_CR5_WRITE_TRAP 0x095 +#define SVM_EXIT_CR6_WRITE_TRAP 0x096 +#define SVM_EXIT_CR7_WRITE_TRAP 0x097 +#define SVM_EXIT_CR8_WRITE_TRAP 0x098 +#define SVM_EXIT_CR9_WRITE_TRAP 0x099 +#define SVM_EXIT_CR10_WRITE_TRAP 0x09a +#define SVM_EXIT_CR11_WRITE_TRAP 0x09b +#define SVM_EXIT_CR12_WRITE_TRAP 0x09c +#define SVM_EXIT_CR13_WRITE_TRAP 0x09d +#define SVM_EXIT_CR14_WRITE_TRAP 0x09e +#define SVM_EXIT_CR15_WRITE_TRAP 0x09f +#define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUS_LOCK 0x0a5 +#define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +#define SVM_EXIT_VMGEXIT 0x403 + +/* SEV-ES software-defined VMGEXIT events */ +#define SVM_VMGEXIT_MMIO_READ 0x80000001 +#define SVM_VMGEXIT_MMIO_WRITE 0x80000002 +#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003 +#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004 +#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 +#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 +#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_PSC 0x80000010 +#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011 +#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012 +#define SVM_VMGEXIT_AP_CREATION 0x80000013 +#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 +#define SVM_VMGEXIT_AP_CREATE 1 +#define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SAVIC 0x8000001a +#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 +#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1 +#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL +#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd +#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe +#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ + /* SW_EXITINFO1[3:0] */ \ + (((((u64)reason_set) & 0xf)) | \ + /* SW_EXITINFO1[11:4] */ \ + ((((u64)reason_code) & 0xff) << 4)) +#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff + +/* Exit code reserved for hypervisor/software use */ +#define SVM_EXIT_SW 0xf0000000 #define SVM_EXIT_ERR -1 @@ -170,9 +225,27 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \ + { SVM_EXIT_CR0_WRITE_TRAP, "write_cr0_trap" }, \ + { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ + { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ + { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_BUS_LOCK, "buslock" }, \ + { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ + { SVM_EXIT_VMGEXIT, "vmgexit" }, \ + { SVM_VMGEXIT_MMIO_READ, "vmgexit_mmio_read" }, \ + { SVM_VMGEXIT_MMIO_WRITE, "vmgexit_mmio_write" }, \ + { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ + { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ + { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ + { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \ + { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \ + { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \ + { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \ + { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/include/uapi/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h deleted file mode 100644 index 3935b106de79..000000000000 --- a/arch/x86/include/uapi/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termbits.h> diff --git a/arch/x86/include/uapi/asm/termios.h b/arch/x86/include/uapi/asm/termios.h deleted file mode 100644 index 280d78a9d966..000000000000 --- a/arch/x86/include/uapi/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termios.h> diff --git a/arch/x86/include/uapi/asm/types.h b/arch/x86/include/uapi/asm/types.h deleted file mode 100644 index df55e1ddb0c9..000000000000 --- a/arch/x86/include/uapi/asm/types.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_TYPES_H -#define _ASM_X86_TYPES_H - -#include <asm-generic/types.h> - -#endif /* _ASM_X86_TYPES_H */ diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h index 30d7d04d72d6..be5e2e747f50 100644 --- a/arch/x86/include/uapi/asm/unistd.h +++ b/arch/x86/include/uapi/asm/unistd.h @@ -2,7 +2,14 @@ #ifndef _UAPI_ASM_X86_UNISTD_H #define _UAPI_ASM_X86_UNISTD_H -/* x32 syscall flag bit */ +/* + * x32 syscall flag bit. Some user programs expect syscall NR macros + * and __X32_SYSCALL_BIT to have type int, even though syscall numbers + * are, for practical purposes, unsigned long. + * + * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right + * thing regardless. + */ #define __X32_SYSCALL_BIT 0x40000000 #ifndef __KERNEL__ diff --git a/arch/x86/include/uapi/asm/vm86.h b/arch/x86/include/uapi/asm/vm86.h index d2ee4e307ef8..18909b8050bc 100644 --- a/arch/x86/include/uapi/asm/vm86.h +++ b/arch/x86/include/uapi/asm/vm86.h @@ -97,7 +97,7 @@ struct revectored_struct { struct vm86_struct { struct vm86_regs regs; unsigned long flags; - unsigned long screen_bitmap; + unsigned long screen_bitmap; /* unused, preserved by vm86() */ unsigned long cpu_type; struct revectored_struct int_revectored; struct revectored_struct int21_revectored; @@ -106,7 +106,7 @@ struct vm86_struct { /* * flags masks */ -#define VM86_SCREEN_BITMAP 0x0001 +#define VM86_SCREEN_BITMAP 0x0001 /* no longer supported */ struct vm86plus_info_struct { unsigned long force_return_for_pic:1; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f0b0c90dd398..1baa86dfe029 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -27,12 +27,16 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_INIT_SIGNAL 3 +#define EXIT_REASON_SIPI_SIGNAL 4 +#define EXIT_REASON_OTHER_SMI 6 -#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 @@ -85,12 +89,22 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 +#define EXIT_REASON_UMWAIT 67 +#define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 +#define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_SEAMCALL 76 +#define EXIT_REASON_TDCALL 77 +#define EXIT_REASON_MSR_READ_IMM 84 +#define EXIT_REASON_MSR_WRITE_IMM 85 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ + { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ { EXIT_REASON_CPUID, "CPUID" }, \ @@ -142,7 +156,17 @@ { EXIT_REASON_RDSEED, "RDSEED" }, \ { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" } + { EXIT_REASON_XRSTORS, "XRSTORS" }, \ + { EXIT_REASON_UMWAIT, "UMWAIT" }, \ + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ + { EXIT_REASON_NOTIFY, "NOTIFY" }, \ + { EXIT_REASON_TDCALL, "TDCALL" }, \ + { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \ + { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" } + +#define VMX_EXIT_REASON_FLAGS \ + { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 |
