diff options
Diffstat (limited to 'arch/x86/include')
38 files changed, 457 insertions, 292 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 1b020381ab38..c096624137ae 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ { \ - register void *__sp asm(_ASM_SP); \ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ - : output, "+r" (__sp) \ + : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input); \ } diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 676ee5807d86..b0dc91f4bedc 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -11,10 +11,12 @@ # define __ASM_FORM_COMMA(x) " " #x "," #endif -#ifdef CONFIG_X86_32 +#ifndef __x86_64__ +/* 32 bit */ # define __ASM_SEL(a,b) __ASM_FORM(a) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else +/* 64 bit */ # define __ASM_SEL(a,b) __ASM_FORM(b) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif @@ -132,4 +134,15 @@ /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif +#ifndef __ASSEMBLY__ +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) +#endif + #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 8b4140f6724f..cb9a1af109b4 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -7,6 +7,4 @@ void clflush_cache_range(void *addr, unsigned int size); -#define mmio_flush_range(addr, size) clflush_cache_range(addr, size) - #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 42bbbf0f173d..2519c6c801c9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -288,6 +288,7 @@ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 1a2ba368da39..9d0e13738ed3 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr) #define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) #define store_gdt(dtr) native_store_gdt(dtr) -#define store_idt(dtr) native_store_idt(dtr) #define store_tr(tr) (tr = native_store_tr()) #define load_TLS(t, cpu) native_load_tls(t, cpu) @@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr) asm volatile("sgdt %0":"=m" (*dtr)); } -static inline void native_store_idt(struct desc_ptr *dtr) +static inline void store_idt(struct desc_ptr *dtr) { asm volatile("sidt %0":"=m" (*dtr)); } diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 554cdb205d17..e3221ffa304e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -23,11 +23,9 @@ /* * High level FPU state handling functions: */ -extern void fpu__activate_curr(struct fpu *fpu); -extern void fpu__activate_fpstate_read(struct fpu *fpu); -extern void fpu__activate_fpstate_write(struct fpu *fpu); -extern void fpu__current_fpstate_write_begin(void); -extern void fpu__current_fpstate_write_end(void); +extern void fpu__initialize(struct fpu *fpu); +extern void fpu__prepare_read(struct fpu *fpu); +extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); @@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) -#define check_insn(insn, output, input...) \ -({ \ - int err; \ +#define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ + : output : input) static inline int copy_fregs_to_user(struct fregs_state __user *fx) { @@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) { - int err; - if (IS_ENABLED(CONFIG_X86_32)) { - err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { - err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { /* See comment in copy_fxregs_to_kernel() below. */ - err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); + kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); } } - /* Copying from a kernel buffer to FPU registers should never fail: */ - WARN_ON_FPU(err); } static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) @@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) static inline void copy_kernel_to_fregs(struct fregs_state *fx) { - int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - - WARN_ON_FPU(err); + kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline int copy_user_to_fregs(struct fregs_state __user *fx) @@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact * XSAVE area format. */ -#define XSTATE_XRESTORE(st, lmask, hmask, err) \ +#define XSTATE_XRESTORE(st, lmask, hmask) \ asm volatile(ALTERNATIVE(XRSTOR, \ XRSTORS, X86_FEATURE_XSAVES) \ "\n" \ - "xor %[err], %[err]\n" \ "3:\n" \ - ".pushsection .fixup,\"ax\"\n" \ - "4: movl $-2, %[err]\n" \ - "jmp 3b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(661b, 4b) \ - : [err] "=r" (err) \ + _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ + : \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - /* We should never fault when copying from a kernel buffer: */ + /* + * We should never fault when copying from a kernel buffer, and the FPU + * state we set at boot time should be valid. + */ WARN_ON_FPU(err); } @@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) u32 hmask = mask >> 32; int err; - WARN_ON(!alternatives_patched); + WARN_ON_FPU(!alternatives_patched); XSTATE_XSAVE(xstate, lmask, hmask, err); @@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; - int err; - - XSTATE_XRESTORE(xstate, lmask, hmask, err); - /* We should never fault when copying from a kernel buffer: */ - WARN_ON_FPU(err); + XSTATE_XRESTORE(xstate, lmask, hmask); } /* @@ -526,38 +503,17 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) */ static inline void fpregs_deactivate(struct fpu *fpu) { - WARN_ON_FPU(!fpu->fpregs_active); - - fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); trace_x86_fpu_regs_deactivated(fpu); } static inline void fpregs_activate(struct fpu *fpu) { - WARN_ON_FPU(fpu->fpregs_active); - - fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); trace_x86_fpu_regs_activated(fpu); } /* - * The question "does this thread have fpu access?" - * is slightly racy, since preemption could come in - * and revoke it immediately after the test. - * - * However, even in that very unlikely scenario, - * we can just assume we have FPU access - typically - * to save the FP state - we'll just take a #NM - * fault and get the FPU access back. - */ -static inline int fpregs_active(void) -{ - return current->thread.fpu.fpregs_active; -} - -/* * FPU state switching for scheduling. * * This is a two-stage process: @@ -571,14 +527,13 @@ static inline int fpregs_active(void) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->fpregs_active) { + if (old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else old_fpu->last_cpu = cpu; /* But leave fpu_fpregs_owner_ctx! */ - old_fpu->fpregs_active = 0; trace_x86_fpu_regs_deactivated(old_fpu); } else old_fpu->last_cpu = -1; @@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) { bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->fpstate_active; + new_fpu->initialized; if (preload) { if (!fpregs_state_valid(new_fpu, cpu)) @@ -617,8 +572,7 @@ static inline void user_fpu_begin(void) struct fpu *fpu = ¤t->thread.fpu; preempt_disable(); - if (!fpregs_active()) - fpregs_activate(fpu); + fpregs_activate(fpu); preempt_enable(); } diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3c80f5b9c09d..a1520575d86b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -68,6 +68,9 @@ struct fxregs_state { /* Default value for fxregs_state.mxcsr: */ #define MXCSR_DEFAULT 0x1f80 +/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ +#define MXCSR_AND_FLAGS_SIZE sizeof(u64) + /* * Software based FPU emulation state. This is arbitrary really, * it matches the x87 format to make it easier to understand: @@ -290,36 +293,13 @@ struct fpu { unsigned int last_cpu; /* - * @fpstate_active: + * @initialized: * - * This flag indicates whether this context is active: if the task + * This flag indicates whether this context is initialized: if the task * is not running then we can restore from this context, if the task * is running then we should save into this context. */ - unsigned char fpstate_active; - - /* - * @fpregs_active: - * - * This flag determines whether a given context is actively - * loaded into the FPU's registers and that those registers - * represent the task's current FPU state. - * - * Note the interaction with fpstate_active: - * - * # task does not use the FPU: - * fpstate_active == 0 - * - * # task uses the FPU and regs are active: - * fpstate_active == 1 && fpregs_active == 1 - * - * # the regs are inactive but still match fpstate: - * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu - * - * The third state is what we use for the lazy restore optimization - * on lazy-switching CPUs. - */ - unsigned char fpregs_active; + unsigned char initialized; /* * @state: diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 1b2799e0699a..83fee2469eb7 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); -int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, - void __user *ubuf, struct xregs_state *xsave); -int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, - struct xregs_state *xsave); +int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); +int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); +int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); + +/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ +extern int validate_xstate_header(const struct xstate_header *hdr); + #endif diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fde36f189836..fa2558e12024 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -219,8 +219,8 @@ struct x86_emulate_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); + bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 369e41c23f07..c73e493adf07 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,15 +79,14 @@ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL #define CR3_PCID_INVD BIT_64(63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ - | X86_CR4_PKE)) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ + | X86_CR4_SMAP | X86_CR4_PKE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -204,7 +203,6 @@ enum { #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ - PFERR_USER_MASK | \ PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) @@ -317,15 +315,17 @@ struct kvm_pio_request { int size; }; +#define PT64_ROOT_MAX_LEVEL 5 + struct rsvd_bits_validate { - u64 rsvd_bits_mask[2][4]; + u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; u64 bad_mt_xwr; }; /* - * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level - * 32-bit). The kvm_mmu structure abstracts the details of the current mmu - * mode. + * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, + * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the + * current mmu mode. */ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); @@ -548,8 +548,8 @@ struct kvm_vcpu_arch { struct kvm_queued_exception { bool pending; + bool injected; bool has_error_code; - bool reinject; u8 nr; u32 error_code; u8 nested_apf; @@ -687,8 +687,12 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; - /* GPA available (AMD only) */ + /* GPA available */ bool gpa_available; + gpa_t gpa_val; + + /* be preempted when it's in kernel-mode(cpl=0) */ + bool preempted_in_kernel; }; struct kvm_lpage_info { @@ -947,7 +951,6 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - u32 (*get_pkru)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); @@ -969,7 +972,7 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - bool (*get_enable_apicv)(void); + bool (*get_enable_apicv)(struct kvm_vcpu *vcpu); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); @@ -979,7 +982,7 @@ struct kvm_x86_ops { void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); - int (*get_tdp_level)(void); + int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); @@ -1297,20 +1300,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -static inline u64 get_canonical(u64 la) -{ - return ((int64_t)la << 16) >> 16; -} - -static inline bool is_noncanonical_address(u64 la) -{ -#ifdef CONFIG_X86_64 - return get_canonical(la) != la; -#else - return false; -#endif -} - #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index bc62e7cbf1b1..59ad3d132353 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); void __init kvm_guest_init(void); -void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); @@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void) #else /* CONFIG_KVM_GUEST */ #define kvm_guest_init() do {} while (0) -#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 8e618fcf1f7c..6a77c63540f7 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -21,7 +21,7 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT -extern unsigned long sme_me_mask; +extern u64 sme_me_mask; void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, unsigned long decrypted_kernel_vaddr, @@ -49,7 +49,7 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size); #else /* !CONFIG_AMD_MEM_ENCRYPT */ -#define sme_me_mask 0UL +#define sme_me_mask 0ULL static inline void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) { } diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 7ae318c340d9..c120b5db178a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -286,6 +286,32 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } +/* + * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID + * bits. This serves two purposes. It prevents a nasty situation in + * which PCID-unaware code saves CR3, loads some other value (with PCID + * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if + * the saved ASID was nonzero. It also means that any bugs involving + * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger + * deterministically. + */ + +static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) +{ + if (static_cpu_has(X86_FEATURE_PCID)) { + VM_WARN_ON_ONCE(asid > 4094); + return __sme_pa(mm->pgd) | (asid + 1); + } else { + VM_WARN_ON_ONCE(asid != 0); + return __sme_pa(mm->pgd); + } +} + +static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) +{ + VM_WARN_ON_ONCE(asid > 4094); + return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH; +} /* * This can be used from process context to figure out what the value of @@ -296,10 +322,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, */ static inline unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); - - if (static_cpu_has(X86_FEATURE_PCID)) - cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid); + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), + this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ VM_WARN_ON(in_nmi() || preemptible()); diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 831eb7895535..c471ca1f9412 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -86,7 +86,6 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { } #endif int generic_processor_info(int apicid, int version); -int __generic_processor_info(int apicid, int version, bool enabled); #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 58b9291b46d8..738503e1f80c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -3,6 +3,8 @@ #include <linux/types.h> #include <linux/atomic.h> +#include <linux/nmi.h> +#include <asm/io.h> #include <asm/hyperv.h> /* @@ -170,12 +172,153 @@ void hv_remove_crash_handler(void); #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; +extern void *hv_hypercall_pg; + +static inline u64 hv_do_hypercall(u64 control, void *input, void *output) +{ + u64 input_address = input ? virt_to_phys(input) : 0; + u64 output_address = output ? virt_to_phys(output) : 0; + u64 hv_status; + +#ifdef CONFIG_X86_64 + if (!hv_hypercall_pg) + return U64_MAX; + + __asm__ __volatile__("mov %4, %%r8\n" + "call *%5" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input_address) + : "r" (output_address), "m" (hv_hypercall_pg) + : "cc", "memory", "r8", "r9", "r10", "r11"); +#else + u32 input_address_hi = upper_32_bits(input_address); + u32 input_address_lo = lower_32_bits(input_address); + u32 output_address_hi = upper_32_bits(output_address); + u32 output_address_lo = lower_32_bits(output_address); + + if (!hv_hypercall_pg) + return U64_MAX; + + __asm__ __volatile__("call *%7" + : "=A" (hv_status), + "+c" (input_address_lo), ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input_address_hi), + "D"(output_address_hi), "S"(output_address_lo), + "m" (hv_hypercall_pg) + : "cc", "memory"); +#endif /* !x86_64 */ + return hv_status; +} + +#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) +#define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 +#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) +#define HV_HYPERCALL_REP_START_OFFSET 48 +#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) + +/* Fast hypercall with 8 bytes of input and no output */ +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) +{ + u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + +#ifdef CONFIG_X86_64 + { + __asm__ __volatile__("call *%4" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + : "m" (hv_hypercall_pg) + : "cc", "r8", "r9", "r10", "r11"); + } +#else + { + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + + __asm__ __volatile__ ("call *%5" + : "=A"(hv_status), + "+c"(input1_lo), + ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input1_hi), + "m" (hv_hypercall_pg) + : "cc", "edi", "esi"); + } +#endif + return hv_status; +} + +/* + * Rep hypercalls. Callers of this functions are supposed to ensure that + * rep_count and varhead_size comply with Hyper-V hypercall definition. + */ +static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, + void *input, void *output) +{ + u64 control = code; + u64 status; + u16 rep_comp; + + control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; + control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; + + do { + status = hv_do_hypercall(control, input, output); + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) + return status; + + /* Bits 32-43 of status have 'Reps completed' data. */ + rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >> + HV_HYPERCALL_REP_COMP_OFFSET; + + control &= ~HV_HYPERCALL_REP_START_MASK; + control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET; + + touch_nmi_watchdog(); + } while (rep_comp < rep_count); + + return status; +} + +/* + * Hypervisor's notion of virtual processor ID is different from + * Linux' notion of CPU ID. This information can only be retrieved + * in the context of the calling CPU. Setup a map for easy access + * to this information. + */ +extern u32 *hv_vp_index; + +/** + * hv_cpu_number_to_vp_number() - Map CPU to VP. + * @cpu_number: CPU number in Linux terms + * + * This function returns the mapping between the Linux processor + * number and the hypervisor's virtual processor number, useful + * in making hypercalls and such that talk about specific + * processors. + * + * Return: Virtual processor number in Hyper-V terms + */ +static inline int hv_cpu_number_to_vp_number(int cpu_number) +{ + return hv_vp_index[cpu_number]; +} void hyperv_init(void); +void hyperv_setup_mmu_ops(void); +void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs); bool hv_is_hypercall_page_setup(void); void hyperv_cleanup(void); -#endif +#else /* CONFIG_HYPERV */ +static inline void hyperv_init(void) {} +static inline bool hv_is_hypercall_page_setup(void) { return false; } +static inline void hyperv_cleanup(void) {} +static inline void hyperv_setup_mmu_ops(void) {} +#endif /* CONFIG_HYPERV */ + #ifdef CONFIG_HYPERV_TSCPAGE struct ms_hyperv_tsc_page *hv_get_tsc_page(void); static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c25dd22f7c70..12deec722cf0 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x) PVOP_VCALL1(pv_mmu_ops.write_cr3, x); } -static inline unsigned long __read_cr4(void) -{ - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); -} - static inline void __write_cr4(unsigned long x) { PVOP_VCALL1(pv_cpu_ops.write_cr4, x); @@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries) { PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); } -static inline void store_idt(struct desc_ptr *dtr) -{ - PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); -} static inline unsigned long paravirt_store_tr(void) { return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); @@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn) PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn); } -static inline void pte_update(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); -} - static inline pte_t __pte(pteval_t val) { pteval_t ret; @@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); } -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) -{ - if (sizeof(pmdval_t) > sizeof(long)) - /* 5 arg words */ - pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd); - else - PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, - native_pmd_val(pmd)); -} - -static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) -{ - if (sizeof(pudval_t) > sizeof(long)) - /* 5 arg words */ - pv_mmu_ops.set_pud_at(mm, addr, pudp, pud); - else - PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp, - native_pud_val(pud)); -} - static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { pmdval_t val = native_pmd_val(pmd); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6b64fc6367f2..280d94c36dad 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -107,7 +107,6 @@ struct pv_cpu_ops { unsigned long (*read_cr0)(void); void (*write_cr0)(unsigned long); - unsigned long (*read_cr4)(void); void (*write_cr4)(unsigned long); #ifdef CONFIG_X86_64 @@ -119,8 +118,6 @@ struct pv_cpu_ops { void (*load_tr_desc)(void); void (*load_gdt)(const struct desc_ptr *); void (*load_idt)(const struct desc_ptr *); - /* store_gdt has been removed. */ - void (*store_idt)(struct desc_ptr *); void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); @@ -245,12 +242,6 @@ struct pv_mmu_ops { void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmdval); - void (*set_pud_at)(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pudval); - void (*pte_update)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); @@ -468,8 +459,8 @@ int paravirt_disable_iospace(void); */ #ifdef CONFIG_X86_32 #define PVOP_VCALL_ARGS \ - unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \ - register void *__sp asm("esp") + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; + #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) @@ -489,8 +480,8 @@ int paravirt_disable_iospace(void); /* [re]ax isn't an arg, but the return val */ #define PVOP_VCALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx, __eax = __eax; \ - register void *__sp asm("rsp") + __edx = __edx, __ecx = __ecx, __eax = __eax; + #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) @@ -541,7 +532,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -551,7 +542,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -578,7 +569,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index bbeae4a2bd01..b714934512b3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags; #else /* !CONFIG_PARAVIRT */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) -#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) -#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) @@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags; #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) #define pmd_clear(pmd) native_pmd_clear(pmd) -#define pte_update(mm, addr, ptep) do { } while (0) - #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) @@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, native_set_pte(ptep, pte); } -static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp , pmd_t pmd) +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) { native_set_pmd(pmdp, pmd); } -static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) { native_set_pud(pudp, pud); } -#ifndef CONFIG_PARAVIRT -/* - * Rules for using pte_update - it must be called after any PTE update which - * has not been done using the set_pte / clear_pte interfaces. It is used by - * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE - * updates should either be sets, clears, or set_pte_atomic for P->P - * transitions, which means this hook should only be called for user PTEs. - * This hook implies a P->P protection or access change has taken place, which - * requires a subsequent TLB flush. - */ -#define pte_update(mm, addr, ptep) do { } while (0) -#endif - /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware @@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); - pte_update(mm, addr, ptep); return pte; } @@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); - pte_update(mm, addr, ptep); } #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) @@ -1172,6 +1153,23 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); } + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY); +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY); +} +#endif #endif #define PKRU_AD_BIT 0x1 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 2160c1fee920..972a4698c530 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -180,15 +180,21 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* * Encode and de-code a swap entry * - * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number - * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names - * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry + * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number + * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names + * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above * there. We also need to avoid using A and D because of an * erratum where they can be incorrectly set by hardware on * non-present PTEs. + * + * SD (1) in swp entry is used to store soft dirty bit, which helps us + * remember soft dirty over page migration + * + * Bit 7 in swp entry should be 0 because pmd_present checks not only P, + * but also L and G. */ #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) #define SWP_TYPE_BITS 5 @@ -204,7 +210,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; } ((type) << (SWP_TYPE_FIRST_BIT)) \ | ((offset) << SWP_OFFSET_FIRST_BIT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) +#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 399261ce904c..f1492473f10e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -99,15 +99,15 @@ /* * Tracking soft dirty bit when a page goes to a swap is tricky. * We need a bit which can be stored in pte _and_ not conflict - * with swap entry format. On x86 bits 6 and 7 are *not* involved - * into swap entry computation, but bit 6 is used for nonlinear - * file mapping, so we borrow bit 7 for soft dirty tracking. + * with swap entry format. On x86 bits 1-4 are *not* involved + * into swap entry computation, but bit 7 is used for thp migration, + * so we borrow bit 1 for soft dirty tracking. * * Please note that this bit must be treated as swap dirty page - * mark if and only if the PTE has present bit clear! + * mark if and only if the PTE/PMD has present bit clear! */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE +#define _PAGE_SWP_SOFT_DIRTY _PAGE_RW #else #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index ec1f3c651150..4f44505dbf87 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset) #ifdef CONFIG_PREEMPT extern asmlinkage void ___preempt_schedule(void); -# define __preempt_schedule() \ -({ \ - register void *__sp asm(_ASM_SP); \ - asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \ -}) +# define __preempt_schedule() \ + asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT) extern asmlinkage void preempt_schedule(void); extern asmlinkage void ___preempt_schedule_notrace(void); -# define __preempt_schedule_notrace() \ -({ \ - register void *__sp asm(_ASM_SP); \ - asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \ -}) +# define __preempt_schedule_notrace() \ + asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT) + extern asmlinkage void preempt_schedule_notrace(void); #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3fa26a61eabc..b390ff76e58f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -677,8 +677,6 @@ static inline void sync_core(void) * Like all of Linux's memory ordering operations, this is a * compiler barrier as well. */ - register void *__sp asm(_ASM_SP); - #ifdef CONFIG_X86_32 asm volatile ( "pushfl\n\t" @@ -686,7 +684,7 @@ static inline void sync_core(void) "pushl $1f\n\t" "iret\n\t" "1:" - : "+r" (__sp) : : "memory"); + : ASM_CALL_CONSTRAINT : : "memory"); #else unsigned int tmp; @@ -703,7 +701,7 @@ static inline void sync_core(void) "iretq\n\t" UNWIND_HINT_RESTORE "1:" - : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); + : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); #endif } diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index a34e0d4b957d..7116b7931c7b 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) ({ \ long tmp; \ struct rw_semaphore* ret; \ - register void *__sp asm(_ASM_SP); \ \ asm volatile("# beginning down_write\n\t" \ LOCK_PREFIX " xadd %1,(%4)\n\t" \ @@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ + : "+m" (sem->count), "=d" (tmp), \ + "=a" (ret), ASM_CALL_CONSTRAINT \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index e4585a393965..a65cf544686a 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -39,6 +39,7 @@ static inline void vsmp_init(void) { } #endif void setup_bios_corruption_check(void); +void early_platform_quirks(void); extern unsigned long saved_video_mode; diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 9efaabf5b54b..a24dfcf79f4a 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -135,6 +135,11 @@ static inline void native_wbinvd(void) extern asmlinkage void native_load_gs_index(unsigned); +static inline unsigned long __read_cr4(void) +{ + return native_read_cr4(); +} + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x) native_write_cr3(x); } -static inline unsigned long __read_cr4(void) -{ - return native_read_cr4(); -} - static inline void __write_cr4(unsigned long x) { native_write_cr4(x); diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index e9ee84873de5..e371e7229042 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -340,6 +340,30 @@ extern void *memset(void *, int, size_t); #endif #endif /* !CONFIG_FORTIFY_SOURCE */ +#define __HAVE_ARCH_MEMSET16 +static inline void *memset16(uint16_t *s, uint16_t v, size_t n) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosw" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET32 +static inline void *memset32(uint32_t *s, uint32_t v, size_t n) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosl" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + /* * find the first occurrence of byte 'c', or 1 past the area if none */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 2a8c822de1fc..f372a70a523f 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -58,6 +58,42 @@ extern void *__memcpy(void *to, const void *from, size_t len); void *memset(void *s, int c, size_t n); void *__memset(void *s, int c, size_t n); +#define __HAVE_ARCH_MEMSET16 +static inline void *memset16(uint16_t *s, uint16_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosw" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET32 +static inline void *memset32(uint32_t *s, uint32_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosl" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET64 +static inline void *memset64(uint64_t *s, uint64_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosq" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t count); void *__memmove(void *dest, const void *src, size_t count); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 58fffe79e417..14835dd205a5 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IRQ_SHIFT 8 #define V_IRQ_MASK (1 << V_IRQ_SHIFT) +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + #define V_INTR_PRIO_SHIFT 16 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) @@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) +#define V_GIF_ENABLE_SHIFT 25 +#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) + #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 5161da1a0fa0..89e7eeb5cec1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -158,17 +158,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d23e61dc0640..4893abf7f74f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -198,6 +198,8 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) cr4_set_bits(mask); } +extern void initialize_tlbstate_and_flush(void); + static inline void __native_flush_tlb(void) { /* diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 342e59789fcd..39f7a27bef13 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, fpregs_active) - __field(bool, fpstate_active) + __field(bool, initialized) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->fpregs_active = fpu->fpregs_active; - __entry->fpstate_active = fpu->fpstate_active; + __entry->initialized = fpu->initialized; if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->fpregs_active, - __entry->fpstate_active, + __entry->initialized, __entry->xfeatures, __entry->xcomp_bv ) diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h new file mode 100644 index 000000000000..4253bca99989 --- /dev/null +++ b/arch/x86/include/asm/trace/hyperv.h @@ -0,0 +1,40 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hyperv + +#if !defined(_TRACE_HYPERV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HYPERV_H + +#include <linux/tracepoint.h> + +#if IS_ENABLED(CONFIG_HYPERV) + +TRACE_EVENT(hyperv_mmu_flush_tlb_others, + TP_PROTO(const struct cpumask *cpus, + const struct flush_tlb_info *info), + TP_ARGS(cpus, info), + TP_STRUCT__entry( + __field(unsigned int, ncpus) + __field(struct mm_struct *, mm) + __field(unsigned long, addr) + __field(unsigned long, end) + ), + TP_fast_assign(__entry->ncpus = cpumask_weight(cpus); + __entry->mm = info->mm; + __entry->addr = info->start; + __entry->end = info->end; + ), + TP_printk("ncpus %d mm %p addr %lx, end %lx", + __entry->ncpus, __entry->mm, + __entry->addr, __entry->end) + ); + +#endif /* CONFIG_HYPERV */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/trace/ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE hyperv +#endif /* _TRACE_HYPERV_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 184eb9894dae..4b892917edeb 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) ({ \ int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ - register void *__sp asm(_ASM_SP); \ __chk_user_ptr(ptr); \ might_fault(); \ asm volatile("call __get_user_%P4" \ - : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ @@ -337,7 +337,7 @@ do { \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=r" (retval), "=&A"(x) \ - : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ + : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \ "i" (errret), "0" (retval)); \ }) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 35cd06f636ab..caec8417539f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,6 +72,7 @@ #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_RDRAND 0x00000800 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define SECONDARY_EXEC_RDSEED 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 @@ -114,6 +115,10 @@ #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +/* VMFUNC functions */ +#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_EPTP_ENTRIES 512 + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); @@ -187,6 +192,8 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, POSTED_INTR_DESC_ADDR = 0x00002016, POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + VM_FUNCTION_CONTROL = 0x00002018, + VM_FUNCTION_CONTROL_HIGH = 0x00002019, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, @@ -197,6 +204,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + EPTP_LIST_ADDRESS = 0x00002024, + EPTP_LIST_ADDRESS_HIGH = 0x00002025, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, XSS_EXIT_BITMAP = 0x0000202C, @@ -444,6 +453,7 @@ enum vmcs_field { #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) +#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7) #define VMX_EPTP_UC_BIT (1ull << 8) #define VMX_EPTP_WB_BIT (1ull << 14) #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) @@ -459,12 +469,14 @@ enum vmcs_field { #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ -#define VMX_EPT_DEFAULT_GAW 3 -#define VMX_EPT_MAX_GAW 0x4 #define VMX_EPT_MT_EPTE_SHIFT 3 -#define VMX_EPT_GAW_EPTP_SHIFT 3 -#define VMX_EPT_AD_ENABLE_BIT (1ull << 6) -#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPTP_PWL_MASK 0x38ull +#define VMX_EPTP_PWL_4 0x18ull +#define VMX_EPTP_PWL_5 0x20ull +#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +#define VMX_EPTP_MT_MASK 0x7ull +#define VMX_EPTP_MT_WB 0x6ull +#define VMX_EPTP_MT_UC 0x0ull #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9606688caa4b..7cb282e9e587 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[]; register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ - register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \ - register void *__sp asm(_ASM_SP); + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; -#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp) +#define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) @@ -552,13 +551,13 @@ static inline void MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, struct desc_struct desc) { - u32 *p = (u32 *) &desc; - mcl->op = __HYPERVISOR_update_descriptor; if (sizeof(maddr) == sizeof(long)) { mcl->args[0] = maddr; mcl->args[1] = *(unsigned long *)&desc; } else { + u32 *p = (u32 *)&desc; + mcl->args[0] = maddr; mcl->args[1] = maddr >> 32; mcl->args[2] = *p++; diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 8417ef7c3885..07b6531813c4 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -158,9 +158,6 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) unsigned long pfn; int ret; - if (xen_feature(XENFEAT_auto_translated_physmap)) - return mfn; - if (unlikely(mfn >= machine_to_phys_nr)) return ~0; @@ -317,8 +314,6 @@ static inline pte_t __pte_ma(pteval_t x) #define p4d_val_ma(x) ((x).p4d) #endif -void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid); - xmaddr_t arbitrary_virt_to_machine(void *address); unsigned long arbitrary_virt_to_mfn(void *vaddr); void make_lowmem_page_readonly(void *vaddr); diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 127ddadee1a5..f65d12504e80 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -149,12 +149,9 @@ */ #define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) -/* - * HV_VP_SET available - */ +/* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) - /* * Crash notification flag. */ @@ -242,7 +239,11 @@ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) /* Declare the various hypercall operations. */ +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d @@ -259,6 +260,16 @@ #define HV_PROCESSOR_POWER_STATE_C2 2 #define HV_PROCESSOR_POWER_STATE_C3 3 +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARCE_4K, + HV_GENERIC_SET_ALL, +}; + /* hypercall status code */ #define HV_STATUS_SUCCESS 0 #define HV_STATUS_INVALID_HYPERCALL_CODE 2 diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index 39bca7fac087..3be08f07695c 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -3,9 +3,6 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ -#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) -#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* * Take the 4 protection key bits out of the vma->vm_flags |