From 51a9a8284e43642fc3e85810fd54f4c245d23a14 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Sep 2017 10:03:12 +0100 Subject: x86/xen: clean up clang build warning In the case where sizeof(maddr) != sizeof(long) p is initialized and never read and clang throws a warning on this. Move declaration of p to clean up the clang build warning: warning: Value stored to 'p' during its initialization is never read Signed-off-by: Colin Ian King Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/include/asm/xen/hypercall.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9606688caa4b..e089c1675a7c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -552,13 +552,13 @@ static inline void MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, struct desc_struct desc) { - u32 *p = (u32 *) &desc; - mcl->op = __HYPERVISOR_update_descriptor; if (sizeof(maddr) == sizeof(long)) { mcl->args[0] = maddr; mcl->args[1] = *(unsigned long *)&desc; } else { + u32 *p = (u32 *)&desc; + mcl->args[0] = maddr; mcl->args[1] = maddr >> 32; mcl->args[2] = *p++; -- cgit From 44889942b6eb356eab27ce25fe10701adfec7776 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Fri, 22 Sep 2017 07:53:15 +0200 Subject: KVM: nVMX: fix HOST_CR3/HOST_CR4 cache For nested virt we maintain multiple VMCS that can run on a vCPU. So it is incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in vCPU-wide data structures. Hyper-V nested on KVM runs into this consistently for me with PCID enabled. CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) fires, and the currently loaded VMCS is updated. Then we switch from L2 to L1 and the next exit reverts CR3 to its old value. Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") Signed-off-by: Ladi Prosek Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0726ca7a1b02..c83d28b0ab05 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -200,6 +200,8 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; + unsigned long vmcs_host_cr3; /* May not match real cr3 */ + unsigned long vmcs_host_cr4; /* May not match real cr4 */ struct list_head loaded_vmcss_on_cpu_link; }; @@ -600,8 +602,6 @@ struct vcpu_vmx { int gs_ldt_reload_needed; int fs_reload_needed; u64 msr_host_bndcfgs; - unsigned long vmcs_host_cr3; /* May not match real cr3 */ - unsigned long vmcs_host_cr4; /* May not match real cr4 */ } host_state; struct { int vm86_active; @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) */ cr3 = __read_cr3(); vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->host_state.vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; /* Save the most likely value for this task's CR4 in the VMCS. */ cr4 = cr4_read_shadow(); vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->host_state.vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { vmcs_writel(HOST_CR3, cr3); - vmx->host_state.vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; } cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { vmcs_writel(HOST_CR4, cr4); - vmx->host_state.vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; } /* When single-stepping over STI and MOV SS, we must clear the -- cgit From e001fa78d44d0b5c7b1498d1e4a038740efa3b1e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 15 Sep 2017 15:26:14 +1000 Subject: KVM: PPC: Book3S HV: Check for updated HDSISR on P9 HDSI exception On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage Interrupt (HDSI) the HDSISR is not be updated at all. To work around this we put a canary value into the HDSISR before returning to a guest and then check for this canary when we take a HDSI. If we find the canary on a HDSI, we know the hardware didn't update the HDSISR. In this case we return to the guest to retake the HDSI which should correctly update the HDSISR the second time HDSI entry. After talking to Paulus we've applied this workaround to all POWER9 CPUs. The workaround of returning to the guest shouldn't ever be triggered on well behaving CPU. The extra instructions should have negligible performance impact. Signed-off-by: Michael Neuling Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 17936f82d3c7..ec69fa45d5a2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) BEGIN_FTR_SECTION mtspr SPRN_PPR, r0 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + +/* Move canary into DSISR to check for later */ +BEGIN_FTR_SECTION + li r0, 0x7fff + mtspr SPRN_HDSISR, r0 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r0, VCPU_GPR(R0)(r4) ld r4, VCPU_GPR(R4)(r4) @@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) kvmppc_hdsi: ld r3, VCPU_KVM(r9) lbz r0, KVM_RADIX(r3) - cmpwi r0, 0 mfspr r4, SPRN_HDAR mfspr r6, SPRN_HDSISR +BEGIN_FTR_SECTION + /* Look for DSISR canary. If we find it, retry instruction */ + cmpdi r6, 0x7fff + beq 6f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + cmpwi r0, 0 bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ /* HPTE not found fault or protection fault? */ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h -- cgit From af2e658fc08a397b10352265e50b83f27e25d73e Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 22 Sep 2017 12:32:35 +0300 Subject: as3645a: Use ams,input-max-microamp as documented in DT bindings DT bindings document the property "ams,input-max-microamp" that limits the chip's maximum input current. The driver and the DTS however used "peak-current-limit" property. Fix this by using the property documented in DT binding documentation. Signed-off-by: Sakari Ailus Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index cb47ae79a5f9..b86fc83a5a65 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -273,7 +273,7 @@ flash-timeout-us = <150000>; flash-max-microamp = <320000>; led-max-microamp = <60000>; - peak-current-limit = <1750000>; + ams,input-max-microamp = <1750000>; }; indicator { led-max-microamp = <10000>; -- cgit From e626c325277531db15314b80610d1f5a1c2637b2 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 22 Sep 2017 12:32:37 +0300 Subject: as3645a: Use integer numbers for parsing LEDs Use integer numbers for LEDs, 0 is the flash and 1 is the indicator. Signed-off-by: Sakari Ailus Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index b86fc83a5a65..1b0bd72945f2 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -267,15 +267,19 @@ clock-frequency = <400000>; as3645a@30 { + #address-cells = <1>; + #size-cells = <0>; reg = <0x30>; compatible = "ams,as3645a"; - flash { + flash@0 { + reg = <0x0>; flash-timeout-us = <150000>; flash-max-microamp = <320000>; led-max-microamp = <60000>; ams,input-max-microamp = <1750000>; }; - indicator { + indicator@1 { + reg = <0x1>; led-max-microamp = <10000>; }; }; -- cgit From 656f083116a4799d8c0194976b8a2d66bf306538 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:44 +0200 Subject: x86/fpu: Rename copyin_to_xsaves()/copyout_from_xsaves() to copy_user_to_xstate()/copy_xstate_to_user() The 'copyin/copyout' nomenclature needlessly departs from what the modern FPU code uses, which is: copy_fpregs_to_fpstate() copy_fpstate_to_sigframe() copy_fregs_to_user() copy_fxregs_to_kernel() copy_fxregs_to_user() copy_kernel_to_fpregs() copy_kernel_to_fregs() copy_kernel_to_fxregs() copy_kernel_to_xregs() copy_user_to_fregs() copy_user_to_fxregs() copy_user_to_xregs() copy_xregs_to_kernel() copy_xregs_to_user() I.e. according to this pattern, the following rename should be done: copyin_to_xsaves() -> copy_user_to_xstate() copyout_from_xsaves() -> copy_xstate_to_user() or, if we want to be pedantic, denote that that the user-space format is ptrace: copyin_to_xsaves() -> copy_user_ptrace_to_xstate() copyout_from_xsaves() -> copy_xstate_to_user_ptrace() But I'd suggest the shorter, non-pedantic name. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-2-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 4 ++-- arch/x86/kernel/fpu/regset.c | 4 ++-- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 1b2799e0699a..a1baa17e9748 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); -int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, +int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave); -int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, +int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index b188b16841e3..165d0545c924 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -92,7 +92,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_read(fpu); if (using_compacted_format()) { - ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave); + ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave); } else { fpstate_sanitize_xstate(fpu); /* @@ -132,7 +132,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_write(fpu); if (boot_cpu_has(X86_FEATURE_XSAVES)) - ret = copyin_to_xsaves(kbuf, ubuf, xsave); + ret = copy_user_to_xstate(kbuf, ubuf, xsave); else ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 83c23c230b4c..b1fe9a1fc4e0 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -324,7 +324,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpu__drop(fpu); if (using_compacted_format()) { - err = copyin_to_xsaves(NULL, buf_fx, + err = copy_user_to_xstate(NULL, buf_fx, &fpu->state.xsave); } else { err = __copy_from_user(&fpu->state.xsave, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c24ac1efb12d..e7bb41723eaa 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -951,7 +951,7 @@ static inline int xstate_copyout(unsigned int pos, unsigned int count, * zero. This is called from xstateregs_get() and there we check the CPU * has XSAVES. */ -int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, +int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave) { unsigned int offset, size; @@ -1023,7 +1023,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, * there we check the CPU has XSAVES and a whole standard-sized buffer * exists. */ -int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, +int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave) { unsigned int offset, size; -- cgit From f0d4f30a7fd299587840a028655285a87f334904 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:45 +0200 Subject: x86/fpu: Split copy_xstate_to_user() into copy_xstate_to_kernel() & copy_xstate_to_user() copy_xstate_to_user() is a weird API - in part due to a bad API inherited from the regset APIs. But don't propagate that bad API choice into the FPU code - so as a first step split the API into kernel and user buffer handling routines. (Also split the xstate_copyout() internal helper.) The split API is a dumb duplication that should be obviously correct, the real splitting will be done in the next patch. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-3-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 4 +- arch/x86/kernel/fpu/regset.c | 5 +- arch/x86/kernel/fpu/xstate.c | 110 +++++++++++++++++++++++++++++++++++--- 3 files changed, 109 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index a1baa17e9748..92dc8ca14124 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); -int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, - void __user *ubuf, struct xregs_state *xsave); +int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave); +int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave); int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 165d0545c924..b6d12d66d04b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -92,7 +92,10 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_read(fpu); if (using_compacted_format()) { - ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave); + if (kbuf) + ret = copy_xstate_to_kernel(pos, count, kbuf, ubuf, xsave); + else + ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave); } else { fpstate_sanitize_xstate(fpu); /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index e7bb41723eaa..38561539cb99 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -924,10 +924,106 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, * This is similar to user_regset_copyout(), but will not add offset to * the source data pointer or increment pos, count, kbuf, and ubuf. */ -static inline int xstate_copyout(unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - const void *data, const int start_pos, - const int end_pos) +static inline int +__copy_xstate_to_kernel(unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + const void *data, const int start_pos, + const int end_pos) +{ + if ((count == 0) || (pos < start_pos)) + return 0; + + if (end_pos < 0 || pos < end_pos) { + unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); + + if (kbuf) { + memcpy(kbuf + pos, data, copy); + } else { + if (__copy_to_user(ubuf + pos, data, copy)) + return -EFAULT; + } + } + return 0; +} + +/* + * Convert from kernel XSAVES compacted format to standard format and copy + * to a kernel-space ptrace buffer. + * + * It supports partial copy but pos always starts from zero. This is called + * from xstateregs_get() and there we check the CPU has XSAVES. + */ +int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, + void __user *ubuf, struct xregs_state *xsave) +{ + unsigned int offset, size; + int ret, i; + struct xstate_header header; + + /* + * Currently copy_regset_to_user() starts from pos 0: + */ + if (unlikely(pos != 0)) + return -EFAULT; + + /* + * The destination is a ptrace buffer; we put in only user xstates: + */ + memset(&header, 0, sizeof(header)); + header.xfeatures = xsave->header.xfeatures; + header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + + /* + * Copy xregs_state->header: + */ + offset = offsetof(struct xregs_state, header); + size = sizeof(header); + + ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, &header, 0, count); + + if (ret) + return ret; + + for (i = 0; i < XFEATURE_MAX; i++) { + /* + * Copy only in-use xstates: + */ + if ((header.xfeatures >> i) & 1) { + void *src = __raw_xsave_addr(xsave, 1 << i); + + offset = xstate_offsets[i]; + size = xstate_sizes[i]; + + ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, src, 0, count); + + if (ret) + return ret; + + if (offset + size >= count) + break; + } + + } + + /* + * Fill xsave->i387.sw_reserved value for ptrace frame: + */ + offset = offsetof(struct fxregs_state, sw_reserved); + size = sizeof(xstate_fx_sw_bytes); + + ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); + + if (ret) + return ret; + + return 0; +} + +static inline int +__copy_xstate_to_user(unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + const void *data, const int start_pos, + const int end_pos) { if ((count == 0) || (pos < start_pos)) return 0; @@ -977,7 +1073,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count); + ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, &header, 0, count); if (ret) return ret; @@ -992,7 +1088,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count); + ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, src, 0, count); if (ret) return ret; @@ -1009,7 +1105,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); + ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); if (ret) return ret; -- cgit From 4d981cf2d96f29cdfa7d4972c8b377fe7baa9c4c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:46 +0200 Subject: x86/fpu: Remove 'ubuf' parameter from the copy_xstate_to_kernel() APIs The 'ubuf' parameter is unused in the _kernel() side of the API, remove it. This simplifies the code and makes it easier to think about. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-4-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 21 ++++++--------------- 3 files changed, 8 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 92dc8ca14124..c762574a245f 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -48,7 +48,7 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); -int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave); +int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave); int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave); int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index b6d12d66d04b..34e74adf9d5d 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -93,7 +93,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (using_compacted_format()) { if (kbuf) - ret = copy_xstate_to_kernel(pos, count, kbuf, ubuf, xsave); + ret = copy_xstate_to_kernel(pos, count, kbuf, xsave); else ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave); } else { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 38561539cb99..71d3bda2b898 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -926,7 +926,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, */ static inline int __copy_xstate_to_kernel(unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, + void *kbuf, const void *data, const int start_pos, const int end_pos) { @@ -936,12 +936,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count, if (end_pos < 0 || pos < end_pos) { unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); - if (kbuf) { - memcpy(kbuf + pos, data, copy); - } else { - if (__copy_to_user(ubuf + pos, data, copy)) - return -EFAULT; - } + memcpy(kbuf + pos, data, copy); } return 0; } @@ -953,8 +948,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count, * It supports partial copy but pos always starts from zero. This is called * from xstateregs_get() and there we check the CPU has XSAVES. */ -int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, - void __user *ubuf, struct xregs_state *xsave) +int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave) { unsigned int offset, size; int ret, i; @@ -979,8 +973,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, &header, 0, count); - + ret = __copy_xstate_to_kernel(offset, size, kbuf, &header, 0, count); if (ret) return ret; @@ -994,8 +987,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, src, 0, count); - + ret = __copy_xstate_to_kernel(offset, size, kbuf, src, 0, count); if (ret) return ret; @@ -1011,8 +1003,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); - + ret = __copy_xstate_to_kernel(offset, size, kbuf, xstate_fx_sw_bytes, 0, count); if (ret) return ret; -- cgit From a69c158fb3e7a91220f55029bf222a4e678d16e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:47 +0200 Subject: x86/fpu: Remove 'kbuf' parameter from the copy_xstate_to_user() APIs The 'kbuf' parameter is unused in the _user() side of the API, remove it. This simplifies the code and makes it easier to think about. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-5-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 25 +++++++------------------ 3 files changed, 9 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index c762574a245f..65bd68c30cd0 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -49,7 +49,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave); -int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave); +int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave); int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 34e74adf9d5d..fd6dbdd8fde6 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -95,7 +95,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (kbuf) ret = copy_xstate_to_kernel(pos, count, kbuf, xsave); else - ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave); + ret = copy_xstate_to_user(pos, count, ubuf, xsave); } else { fpstate_sanitize_xstate(fpu); /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 71d3bda2b898..2d8f3344875d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1011,10 +1011,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru } static inline int -__copy_xstate_to_user(unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - const void *data, const int start_pos, - const int end_pos) +__copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, const void *data, const int start_pos, const int end_pos) { if ((count == 0) || (pos < start_pos)) return 0; @@ -1022,12 +1019,8 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count, if (end_pos < 0 || pos < end_pos) { unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); - if (kbuf) { - memcpy(kbuf + pos, data, copy); - } else { - if (__copy_to_user(ubuf + pos, data, copy)) - return -EFAULT; - } + if (__copy_to_user(ubuf + pos, data, copy)) + return -EFAULT; } return 0; } @@ -1038,8 +1031,7 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count, * zero. This is called from xstateregs_get() and there we check the CPU * has XSAVES. */ -int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, - void __user *ubuf, struct xregs_state *xsave) +int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave) { unsigned int offset, size; int ret, i; @@ -1064,8 +1056,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, &header, 0, count); - + ret = __copy_xstate_to_user(offset, size, ubuf, &header, 0, count); if (ret) return ret; @@ -1079,8 +1070,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, src, 0, count); - + ret = __copy_xstate_to_user(offset, size, ubuf, src, 0, count); if (ret) return ret; @@ -1096,8 +1086,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); - + ret = __copy_xstate_to_user(offset, size, ubuf, xstate_fx_sw_bytes, 0, count); if (ret) return ret; -- cgit From d7eda6c99cc75f1c41d67abf988f37a10045a370 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:48 +0200 Subject: x86/fpu: Clean up parameter order in the copy_xstate_to_*() APIs Parameter ordering is weird: int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave); int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave); 'pos' and 'count', which are attributes of the destination buffer, are listed before the destination buffer itself ... List them after the primary arguments instead. This makes the code more similar to regular memcpy() variant APIs. No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-6-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 4 ++-- arch/x86/kernel/fpu/regset.c | 4 ++-- arch/x86/kernel/fpu/xstate.c | 25 ++++++++++++------------- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 65bd68c30cd0..e4430b84939d 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); -int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave); -int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave); +int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count); +int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count); int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index fd6dbdd8fde6..ec1404194b65 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -93,9 +93,9 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (using_compacted_format()) { if (kbuf) - ret = copy_xstate_to_kernel(pos, count, kbuf, xsave); + ret = copy_xstate_to_kernel(kbuf, xsave, pos, count); else - ret = copy_xstate_to_user(pos, count, ubuf, xsave); + ret = copy_xstate_to_user(ubuf, xsave, pos, count); } else { fpstate_sanitize_xstate(fpu); /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 2d8f3344875d..0a299468510f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -925,10 +925,9 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, * the source data pointer or increment pos, count, kbuf, and ubuf. */ static inline int -__copy_xstate_to_kernel(unsigned int pos, unsigned int count, - void *kbuf, - const void *data, const int start_pos, - const int end_pos) +__copy_xstate_to_kernel(void *kbuf, + const void *data, + unsigned int pos, unsigned int count, const int start_pos, const int end_pos) { if ((count == 0) || (pos < start_pos)) return 0; @@ -948,7 +947,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count, * It supports partial copy but pos always starts from zero. This is called * from xstateregs_get() and there we check the CPU has XSAVES. */ -int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave) +int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count) { unsigned int offset, size; int ret, i; @@ -973,7 +972,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_kernel(offset, size, kbuf, &header, 0, count); + ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, 0, count); if (ret) return ret; @@ -987,7 +986,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_kernel(offset, size, kbuf, src, 0, count); + ret = __copy_xstate_to_kernel(kbuf, src, offset, size, 0, count); if (ret) return ret; @@ -1003,7 +1002,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_kernel(offset, size, kbuf, xstate_fx_sw_bytes, 0, count); + ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, 0, count); if (ret) return ret; @@ -1011,7 +1010,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru } static inline int -__copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, const void *data, const int start_pos, const int end_pos) +__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, const int start_pos, const int end_pos) { if ((count == 0) || (pos < start_pos)) return 0; @@ -1031,7 +1030,7 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, c * zero. This is called from xstateregs_get() and there we check the CPU * has XSAVES. */ -int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave) +int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count) { unsigned int offset, size; int ret, i; @@ -1056,7 +1055,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_user(offset, size, ubuf, &header, 0, count); + ret = __copy_xstate_to_user(ubuf, &header, offset, size, 0, count); if (ret) return ret; @@ -1070,7 +1069,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_user(offset, size, ubuf, src, 0, count); + ret = __copy_xstate_to_user(ubuf, src, offset, size, 0, count); if (ret) return ret; @@ -1086,7 +1085,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_user(offset, size, ubuf, xstate_fx_sw_bytes, 0, count); + ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, 0, count); if (ret) return ret; -- cgit From becb2bb72ff906cc0d2bac3ee9574f694364823b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:49 +0200 Subject: x86/fpu: Clean up the parameter definitions of copy_xstate_to_*() Remove pointless 'const' of non-pointer input parameter. Remove unnecessary parenthesis that shows uncertainty about arithmetic operator precedence. Clarify copy_xstate_to_user() description. No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-7-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0a299468510f..9647e7256179 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -927,13 +927,13 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline int __copy_xstate_to_kernel(void *kbuf, const void *data, - unsigned int pos, unsigned int count, const int start_pos, const int end_pos) + unsigned int pos, unsigned int count, int start_pos, int end_pos) { if ((count == 0) || (pos < start_pos)) return 0; if (end_pos < 0 || pos < end_pos) { - unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); + unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos); memcpy(kbuf + pos, data, copy); } @@ -1010,13 +1010,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po } static inline int -__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, const int start_pos, const int end_pos) +__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int start_pos, int end_pos) { if ((count == 0) || (pos < start_pos)) return 0; if (end_pos < 0 || pos < end_pos) { - unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); + unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos); if (__copy_to_user(ubuf + pos, data, copy)) return -EFAULT; @@ -1026,7 +1026,7 @@ __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, uns /* * Convert from kernel XSAVES compacted format to standard format and copy - * to a ptrace buffer. It supports partial copy but pos always starts from + * to a user-space buffer. It supports partial copy but pos always starts from * zero. This is called from xstateregs_get() and there we check the CPU * has XSAVES. */ -- cgit From 8a5b731889cbf004b406d988dc591c8a7aac773e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:50 +0200 Subject: x86/fpu: Remove the 'start_pos' parameter from the __copy_xstate_to_*() functions 'start_pos' is always 0, so remove it and remove the pointless check of 'pos < 0' which can not ever be true as 'pos' is unsigned ... No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-8-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9647e7256179..1f50fdaf4c5a 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -927,9 +927,9 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline int __copy_xstate_to_kernel(void *kbuf, const void *data, - unsigned int pos, unsigned int count, int start_pos, int end_pos) + unsigned int pos, unsigned int count, int end_pos) { - if ((count == 0) || (pos < start_pos)) + if (!count) return 0; if (end_pos < 0 || pos < end_pos) { @@ -972,7 +972,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, 0, count); + ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, count); if (ret) return ret; @@ -986,7 +986,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_kernel(kbuf, src, offset, size, 0, count); + ret = __copy_xstate_to_kernel(kbuf, src, offset, size, count); if (ret) return ret; @@ -1002,7 +1002,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, 0, count); + ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, count); if (ret) return ret; @@ -1010,9 +1010,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po } static inline int -__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int start_pos, int end_pos) +__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int end_pos) { - if ((count == 0) || (pos < start_pos)) + if (!count) return 0; if (end_pos < 0 || pos < end_pos) { @@ -1055,7 +1055,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_user(ubuf, &header, offset, size, 0, count); + ret = __copy_xstate_to_user(ubuf, &header, offset, size, count); if (ret) return ret; @@ -1069,7 +1069,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_user(ubuf, src, offset, size, 0, count); + ret = __copy_xstate_to_user(ubuf, src, offset, size, count); if (ret) return ret; @@ -1085,7 +1085,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, 0, count); + ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, count); if (ret) return ret; -- cgit From 56583c9a1400fe1935edd55b24b4fbbc779b59cb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:51 +0200 Subject: x86/fpu: Clarify parameter names in the copy_xstate_to_*() methods Right now there's a confusing mixture of 'offset' and 'size' parameters: - __copy_xstate_to_*() input parameter 'end_pos' not not really an offset, but the full size of the copy to be performed. - input parameter 'count' to copy_xstate_to_*() shadows that of __copy_xstate_to_*()'s 'count' parameter name - but the roles are different: the first one is the total number of bytes to be copied, while the second one is a partial copy size. To unconfuse all this, use a consistent set of parameter names: - 'size' is the partial copy size within a single xstate component - 'size_total' is the total copy requested - 'offset_start' is the requested starting offset. - 'offset' is the offset within an xstate component. No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-9-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 4 ++-- arch/x86/kernel/fpu/xstate.c | 44 +++++++++++++++++++-------------------- 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index e4430b84939d..fed6617a1079 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); -int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count); -int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count); +int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); +int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1f50fdaf4c5a..c13083579655 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -927,15 +927,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline int __copy_xstate_to_kernel(void *kbuf, const void *data, - unsigned int pos, unsigned int count, int end_pos) + unsigned int offset, unsigned int size, int size_total) { - if (!count) + if (!size) return 0; - if (end_pos < 0 || pos < end_pos) { - unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos); + if (size_total < 0 || offset < size_total) { + unsigned int copy = size_total < 0 ? size : min(size, size_total - offset); - memcpy(kbuf + pos, data, copy); + memcpy(kbuf + offset, data, copy); } return 0; } @@ -947,7 +947,7 @@ __copy_xstate_to_kernel(void *kbuf, * It supports partial copy but pos always starts from zero. This is called * from xstateregs_get() and there we check the CPU has XSAVES. */ -int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count) +int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) { unsigned int offset, size; int ret, i; @@ -956,7 +956,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po /* * Currently copy_regset_to_user() starts from pos 0: */ - if (unlikely(pos != 0)) + if (unlikely(offset_start != 0)) return -EFAULT; /* @@ -972,7 +972,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, count); + ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); if (ret) return ret; @@ -986,11 +986,11 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_kernel(kbuf, src, offset, size, count); + ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); if (ret) return ret; - if (offset + size >= count) + if (offset + size >= size_total) break; } @@ -1002,7 +1002,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, count); + ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); if (ret) return ret; @@ -1010,15 +1010,15 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po } static inline int -__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int end_pos) +__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, int size_total) { - if (!count) + if (!size) return 0; - if (end_pos < 0 || pos < end_pos) { - unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos); + if (size_total < 0 || offset < size_total) { + unsigned int copy = size_total < 0 ? size : min(size, size_total - offset); - if (__copy_to_user(ubuf + pos, data, copy)) + if (__copy_to_user(ubuf + offset, data, copy)) return -EFAULT; } return 0; @@ -1030,7 +1030,7 @@ __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, uns * zero. This is called from xstateregs_get() and there we check the CPU * has XSAVES. */ -int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count) +int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) { unsigned int offset, size; int ret, i; @@ -1039,7 +1039,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i /* * Currently copy_regset_to_user() starts from pos 0: */ - if (unlikely(pos != 0)) + if (unlikely(offset_start != 0)) return -EFAULT; /* @@ -1055,7 +1055,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_user(ubuf, &header, offset, size, count); + ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total); if (ret) return ret; @@ -1069,11 +1069,11 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i offset = xstate_offsets[i]; size = xstate_sizes[i]; - ret = __copy_xstate_to_user(ubuf, src, offset, size, count); + ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total); if (ret) return ret; - if (offset + size >= count) + if (offset + size >= size_total) break; } @@ -1085,7 +1085,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, count); + ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total); if (ret) return ret; -- cgit From 6ff15f8db7eaf29ef5ead6afbec9b25485fe8703 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:52 +0200 Subject: x86/fpu: Change 'size_total' parameter to unsigned and standardize the size checks in copy_xstate_to_*() 'size_total' is derived from an unsigned input parameter - and then converted to 'int' and checked for negative ranges: if (size_total < 0 || offset < size_total) { This conversion and the checks are unnecessary obfuscation, reject overly large requested copy sizes outright and simplify the underlying code. Reported-by: Rik van Riel Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-10-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c13083579655..b18c5457065a 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -925,15 +925,11 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, * the source data pointer or increment pos, count, kbuf, and ubuf. */ static inline int -__copy_xstate_to_kernel(void *kbuf, - const void *data, - unsigned int offset, unsigned int size, int size_total) +__copy_xstate_to_kernel(void *kbuf, const void *data, + unsigned int offset, unsigned int size, unsigned int size_total) { - if (!size) - return 0; - - if (size_total < 0 || offset < size_total) { - unsigned int copy = size_total < 0 ? size : min(size, size_total - offset); + if (offset < size_total) { + unsigned int copy = min(size, size_total - offset); memcpy(kbuf + offset, data, copy); } @@ -986,12 +982,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of offset = xstate_offsets[i]; size = xstate_sizes[i]; + /* The next component has to fit fully into the output buffer: */ + if (offset + size > size_total) + break; + ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); if (ret) return ret; - - if (offset + size >= size_total) - break; } } @@ -1010,13 +1007,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of } static inline int -__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, int size_total) +__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total) { if (!size) return 0; - if (size_total < 0 || offset < size_total) { - unsigned int copy = size_total < 0 ? size : min(size, size_total - offset); + if (offset < size_total) { + unsigned int copy = min(size, size_total - offset); if (__copy_to_user(ubuf + offset, data, copy)) return -EFAULT; @@ -1069,12 +1066,13 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i offset = xstate_offsets[i]; size = xstate_sizes[i]; + /* The next component has to fit fully into the output buffer: */ + if (offset + size > size_total) + break; + ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total); if (ret) return ret; - - if (offset + size >= size_total) - break; } } -- cgit From 8c0817f4a3188ac5485ce14f96f12a175800b881 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:53 +0200 Subject: x86/fpu: Simplify __copy_xstate_to_kernel() return values __copy_xstate_to_kernel() can only return 0 (because kernel copies cannot fail), simplify the code throughout. No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-11-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b18c5457065a..00c3b41c3cf1 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -924,7 +924,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, * This is similar to user_regset_copyout(), but will not add offset to * the source data pointer or increment pos, count, kbuf, and ubuf. */ -static inline int +static inline void __copy_xstate_to_kernel(void *kbuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total) { @@ -933,7 +933,6 @@ __copy_xstate_to_kernel(void *kbuf, const void *data, memcpy(kbuf + offset, data, copy); } - return 0; } /* @@ -946,8 +945,8 @@ __copy_xstate_to_kernel(void *kbuf, const void *data, int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) { unsigned int offset, size; - int ret, i; struct xstate_header header; + int i; /* * Currently copy_regset_to_user() starts from pos 0: @@ -968,9 +967,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of offset = offsetof(struct xregs_state, header); size = sizeof(header); - ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); - if (ret) - return ret; + __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); for (i = 0; i < XFEATURE_MAX; i++) { /* @@ -986,9 +983,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of if (offset + size > size_total) break; - ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); - if (ret) - return ret; + __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); } } @@ -999,9 +994,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of offset = offsetof(struct fxregs_state, sw_reserved); size = sizeof(xstate_fx_sw_bytes); - ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); - if (ret) - return ret; + __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); return 0; } -- cgit From 79fecc2b7506f29fb91becc65e8788e5ae7eba9f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:54 +0200 Subject: x86/fpu: Split copy_user_to_xstate() into copy_kernel_to_xstate() & copy_user_to_xstate() Similar to: x86/fpu: Split copy_xstate_to_user() into copy_xstate_to_kernel() & copy_xstate_to_user() No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-12-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 4 +-- arch/x86/kernel/fpu/regset.c | 10 ++++-- arch/x86/kernel/fpu/xstate.c | 66 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index fed6617a1079..79af79dbcab6 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); -int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, - struct xregs_state *xsave); +int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); +int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index ec1404194b65..cb45dd81d617 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -134,10 +134,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_write(fpu); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - ret = copy_user_to_xstate(kbuf, ubuf, xsave); - else + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + if (kbuf) + ret = copy_kernel_to_xstate(kbuf, ubuf, xsave); + else + ret = copy_user_to_xstate(kbuf, ubuf, xsave); + } else { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + } /* * In case of failure, mark all states as init: diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 00c3b41c3cf1..1ad25d1b8056 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1084,7 +1084,71 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i } /* - * Convert from a ptrace standard-format buffer to kernel XSAVES format + * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format + * and copy to the target thread. This is called from xstateregs_set() and + * there we check the CPU has XSAVES and a whole standard-sized buffer + * exists. + */ +int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, + struct xregs_state *xsave) +{ + unsigned int offset, size; + int i; + u64 xfeatures; + u64 allowed_features; + + offset = offsetof(struct xregs_state, header); + size = sizeof(xfeatures); + + if (kbuf) { + memcpy(&xfeatures, kbuf + offset, size); + } else { + if (__copy_from_user(&xfeatures, ubuf + offset, size)) + return -EFAULT; + } + + /* + * Reject if the user sets any disabled or supervisor features: + */ + allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; + + if (xfeatures & ~allowed_features) + return -EINVAL; + + for (i = 0; i < XFEATURE_MAX; i++) { + u64 mask = ((u64)1 << i); + + if (xfeatures & mask) { + void *dst = __raw_xsave_addr(xsave, 1 << i); + + offset = xstate_offsets[i]; + size = xstate_sizes[i]; + + if (kbuf) { + memcpy(dst, kbuf + offset, size); + } else { + if (__copy_from_user(dst, ubuf + offset, size)) + return -EFAULT; + } + } + } + + /* + * The state that came in from userspace was user-state only. + * Mask all the user states out of 'xfeatures': + */ + xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; + + /* + * Add back in the features that came in from userspace: + */ + xsave->header.xfeatures |= xfeatures; + + return 0; +} + +/* + * Convert from a ptrace standard-format user-space buffer to kernel XSAVES format * and copy to the target thread. This is called from xstateregs_set() and * there we check the CPU has XSAVES and a whole standard-sized buffer * exists. -- cgit From 59dffa4edba1f15b2bfdbe608aca1efe664c674c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:55 +0200 Subject: x86/fpu: Remove 'ubuf' parameter from the copy_kernel_to_xstate() API No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-13-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 17 +++-------------- 3 files changed, 5 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 79af79dbcab6..f10889bc0c88 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); -int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); +int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave); int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index cb45dd81d617..785302c75f38 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -136,7 +136,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (boot_cpu_has(X86_FEATURE_XSAVES)) { if (kbuf) - ret = copy_kernel_to_xstate(kbuf, ubuf, xsave); + ret = copy_kernel_to_xstate(kbuf, xsave); else ret = copy_user_to_xstate(kbuf, ubuf, xsave); } else { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1ad25d1b8056..71cc8d367fdd 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1089,8 +1089,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i * there we check the CPU has XSAVES and a whole standard-sized buffer * exists. */ -int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, - struct xregs_state *xsave) +int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave) { unsigned int offset, size; int i; @@ -1100,12 +1099,7 @@ int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, offset = offsetof(struct xregs_state, header); size = sizeof(xfeatures); - if (kbuf) { - memcpy(&xfeatures, kbuf + offset, size); - } else { - if (__copy_from_user(&xfeatures, ubuf + offset, size)) - return -EFAULT; - } + memcpy(&xfeatures, kbuf + offset, size); /* * Reject if the user sets any disabled or supervisor features: @@ -1124,12 +1118,7 @@ int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, offset = xstate_offsets[i]; size = xstate_sizes[i]; - if (kbuf) { - memcpy(dst, kbuf + offset, size); - } else { - if (__copy_from_user(dst, ubuf + offset, size)) - return -EFAULT; - } + memcpy(dst, kbuf + offset, size); } } -- cgit From 7b9094c688f807c110a2dab6f6edc5876bfa7b0b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:56 +0200 Subject: x86/fpu: Remove 'kbuf' parameter from the copy_user_to_xstate() API No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-14-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/signal.c | 11 ++++------- arch/x86/kernel/fpu/xstate.c | 19 +++++-------------- 4 files changed, 11 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index f10889bc0c88..4ceb90740d80 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -51,5 +51,5 @@ int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave); -int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave); +int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 785302c75f38..caf723f31737 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -138,7 +138,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (kbuf) ret = copy_kernel_to_xstate(kbuf, xsave); else - ret = copy_user_to_xstate(kbuf, ubuf, xsave); + ret = copy_user_to_xstate(ubuf, xsave); } else { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index b1fe9a1fc4e0..2c685b492fd6 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -323,13 +323,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ fpu__drop(fpu); - if (using_compacted_format()) { - err = copy_user_to_xstate(NULL, buf_fx, - &fpu->state.xsave); - } else { - err = __copy_from_user(&fpu->state.xsave, - buf_fx, state_size); - } + if (using_compacted_format()) + err = copy_user_to_xstate(buf_fx, &fpu->state.xsave); + else + err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); if (err || __copy_from_user(&env, buf, sizeof(env))) { fpstate_init(&fpu->state); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 71cc8d367fdd..b1f3e4dae2e3 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1142,8 +1142,7 @@ int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave) * there we check the CPU has XSAVES and a whole standard-sized buffer * exists. */ -int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, - struct xregs_state *xsave) +int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave) { unsigned int offset, size; int i; @@ -1153,12 +1152,8 @@ int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, offset = offsetof(struct xregs_state, header); size = sizeof(xfeatures); - if (kbuf) { - memcpy(&xfeatures, kbuf + offset, size); - } else { - if (__copy_from_user(&xfeatures, ubuf + offset, size)) - return -EFAULT; - } + if (__copy_from_user(&xfeatures, ubuf + offset, size)) + return -EFAULT; /* * Reject if the user sets any disabled or supervisor features: @@ -1177,12 +1172,8 @@ int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, offset = xstate_offsets[i]; size = xstate_sizes[i]; - if (kbuf) { - memcpy(dst, kbuf + offset, size); - } else { - if (__copy_from_user(dst, ubuf + offset, size)) - return -EFAULT; - } + if (__copy_from_user(dst, ubuf + offset, size)) + return -EFAULT; } } -- cgit From 6d7f7da5533a3f841eeb1d9657257c9367924274 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:57 +0200 Subject: x86/fpu: Flip the parameter order in copy_*_to_xstate() Make it more consistent with regular memcpy() semantics, where the destination argument comes first. No change in functionality. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-15-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 4 ++-- arch/x86/kernel/fpu/regset.c | 4 ++-- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 4ceb90740d80..579ac2358e63 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); -int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave); -int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave); +int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); #endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index caf723f31737..19a7385a912c 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -136,9 +136,9 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (boot_cpu_has(X86_FEATURE_XSAVES)) { if (kbuf) - ret = copy_kernel_to_xstate(kbuf, xsave); + ret = copy_kernel_to_xstate(xsave, kbuf); else - ret = copy_user_to_xstate(ubuf, xsave); + ret = copy_user_to_xstate(xsave, ubuf); } else { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 2c685b492fd6..2d682dac35d4 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -324,7 +324,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpu__drop(fpu); if (using_compacted_format()) - err = copy_user_to_xstate(buf_fx, &fpu->state.xsave); + err = copy_user_to_xstate(&fpu->state.xsave, buf_fx); else err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b1f3e4dae2e3..0ef35040d0ad 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1089,7 +1089,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i * there we check the CPU has XSAVES and a whole standard-sized buffer * exists. */ -int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave) +int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) { unsigned int offset, size; int i; @@ -1142,7 +1142,7 @@ int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave) * there we check the CPU has XSAVES and a whole standard-sized buffer * exists. */ -int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave) +int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) { unsigned int offset, size; int i; -- cgit From b3a163081c28d1a4d1ad76259a9d93b34a82f1da Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:58 +0200 Subject: x86/fpu: Simplify fpu->fpregs_active use The fpregs_active() inline function is pretty pointless - in almost all the callsites it can be replaced with a direct fpu->fpregs_active access. Do so and eliminate the extra layer of obfuscation. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-16-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 17 +---------------- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/signal.c | 9 +++++---- arch/x86/mm/pkeys.c | 2 +- 4 files changed, 8 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 554cdb205d17..b223c57dd5dc 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -542,21 +542,6 @@ static inline void fpregs_activate(struct fpu *fpu) trace_x86_fpu_regs_activated(fpu); } -/* - * The question "does this thread have fpu access?" - * is slightly racy, since preemption could come in - * and revoke it immediately after the test. - * - * However, even in that very unlikely scenario, - * we can just assume we have FPU access - typically - * to save the FP state - we'll just take a #NM - * fault and get the FPU access back. - */ -static inline int fpregs_active(void) -{ - return current->thread.fpu.fpregs_active; -} - /* * FPU state switching for scheduling. * @@ -617,7 +602,7 @@ static inline void user_fpu_begin(void) struct fpu *fpu = ¤t->thread.fpu; preempt_disable(); - if (!fpregs_active()) + if (!fpu->fpregs_active) fpregs_activate(fpu); preempt_enable(); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e1114f070c2d..bad57248e5a0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -367,7 +367,7 @@ void fpu__current_fpstate_write_end(void) * registers may still be out of date. Update them with * an XRSTOR if they are active. */ - if (fpregs_active()) + if (fpu->fpregs_active) copy_kernel_to_fpregs(&fpu->state); /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 2d682dac35d4..684025654d0c 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) */ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { - struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; + struct fpu *fpu = ¤t->thread.fpu; + struct xregs_state *xsave = &fpu->state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); @@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpregs_active() || using_compacted_format()) { + if (fpu->fpregs_active || using_compacted_format()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; /* Update the thread's fxstate to save the fsave header. */ if (ia32_fxstate) - copy_fxregs_to_kernel(&tsk->thread.fpu); + copy_fxregs_to_kernel(fpu); } else { /* * It is a *bug* if kernel uses compacted-format for xsave @@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) return -1; } - fpstate_sanitize_xstate(&tsk->thread.fpu); + fpstate_sanitize_xstate(fpu); if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) return -1; } diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 2dab69a706ec..e2c23472233e 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -45,7 +45,7 @@ int __execute_only_pkey(struct mm_struct *mm) */ preempt_disable(); if (!need_to_set_mm_pkey && - fpregs_active() && + current->thread.fpu.fpregs_active && !__pkru_allows_read(read_pkru(), execute_only_pkey)) { preempt_enable(); return execute_only_pkey; -- cgit From a10b6a16cdad88170f546d008c77453cddf918e6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 14:59:59 +0200 Subject: x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic Do this temporarily only, to make it easier to change the FPU state machine, in particular this change couples the fpu->fpregs_active and fpu->fpstate_active states: they are only set/cleared together (as far as the scheduler sees them). This will be removed by later patches. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-17-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index bad57248e5a0..b7dc3833d41a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -462,9 +462,11 @@ void fpu__clear(struct fpu *fpu) * Make sure fpstate is cleared and initialized. */ if (static_cpu_has(X86_FEATURE_FPU)) { + preempt_disable(); fpu__activate_curr(fpu); user_fpu_begin(); copy_init_fpstate_to_fpregs(); + preempt_enable(); } } -- cgit From b6aa85558d7e7b18fc3470d2bc1731d2205dd275 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:00 +0200 Subject: x86/fpu: Split the state handling in fpu__drop() Prepare fpu__drop() to use fpu->fpregs_active. There are two distinct usecases for fpu__drop() in this context: exit_thread() when called for 'current' in exit(), and when called for another task in fork(). This patch does not change behavior, it only adds a couple of debug checks and structures the code to make the ->fpregs_active change more obviously correct. All the complications will be removed later on. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-18-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b7dc3833d41a..815dfba7781a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -414,12 +414,19 @@ void fpu__drop(struct fpu *fpu) { preempt_disable(); - if (fpu->fpregs_active) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - fpregs_deactivate(fpu); + if (fpu == ¤t->thread.fpu) { + WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); + + if (fpu->fpregs_active) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + if (fpu->fpregs_active) + fpregs_deactivate(fpu); + } + } else { + WARN_ON_FPU(fpu->fpregs_active); } fpu->fpstate_active = 0; -- cgit From f1c8cd0176078c7bcafdc89cac447cab672a0b5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:01 +0200 Subject: x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active We want to simplify the FPU state machine by eliminating fpu->fpregs_active, and we can do that because the two state flags (::fpregs_active and ::fpstate_active) are set essentially together. The old lazy FPU switching code used to make a distinction - but there's no lazy switching code anymore, we always switch in an 'eager' fashion. Do this by first changing all substantial uses of fpu->fpregs_active to fpu->fpstate_active and adding a few debug checks to double check our assumption is correct. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-19-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 4 +++- arch/x86/kernel/fpu/core.c | 16 ++++++++++------ arch/x86/kernel/fpu/signal.c | 4 +++- arch/x86/mm/pkeys.c | 3 +-- 4 files changed, 17 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index b223c57dd5dc..7fa676f93ac1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -556,7 +556,9 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->fpregs_active) { + WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active); + + if (old_fpu->fpstate_active) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 815dfba7781a..eab244622402 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -100,7 +100,7 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->fpregs_active) { + if (fpu->fpstate_active) { /* * Ignore return value -- we don't care if reg state * is clobbered. @@ -116,7 +116,7 @@ void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu; - if (fpu->fpregs_active) + if (fpu->fpstate_active) copy_kernel_to_fpregs(&fpu->state); kernel_fpu_enable(); @@ -147,8 +147,10 @@ void fpu__save(struct fpu *fpu) WARN_ON_FPU(fpu != ¤t->thread.fpu); preempt_disable(); + WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); + trace_x86_fpu_before_save(fpu); - if (fpu->fpregs_active) { + if (fpu->fpstate_active) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } @@ -262,11 +264,12 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr); */ void fpu__activate_fpstate_read(struct fpu *fpu) { + WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); /* * If fpregs are active (in the current CPU), then * copy them to the fpstate: */ - if (fpu->fpregs_active) { + if (fpu->fpstate_active) { fpu__save(fpu); } else { if (!fpu->fpstate_active) { @@ -362,12 +365,13 @@ void fpu__current_fpstate_write_end(void) { struct fpu *fpu = ¤t->thread.fpu; + WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); /* * 'fpu' now has an updated copy of the state, but the * registers may still be out of date. Update them with * an XRSTOR if they are active. */ - if (fpu->fpregs_active) + if (fpu->fpstate_active) copy_kernel_to_fpregs(&fpu->state); /* @@ -417,7 +421,7 @@ void fpu__drop(struct fpu *fpu) if (fpu == ¤t->thread.fpu) { WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); - if (fpu->fpregs_active) { + if (fpu->fpstate_active) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 684025654d0c..a88083ba7f8b 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -171,7 +171,9 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpu->fpregs_active || using_compacted_format()) { + WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); + + if (fpu->fpstate_active || using_compacted_format()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index e2c23472233e..4d24269c071f 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -18,7 +18,6 @@ #include /* boot_cpu_has, ... */ #include /* vma_pkey() */ -#include /* fpregs_active() */ int __execute_only_pkey(struct mm_struct *mm) { @@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm) */ preempt_disable(); if (!need_to_set_mm_pkey && - current->thread.fpu.fpregs_active && + current->thread.fpu.fpstate_active && !__pkru_allows_read(read_pkru(), execute_only_pkey)) { preempt_enable(); return execute_only_pkey; -- cgit From 6cf4edbe0526db311a28734609da888fdfcb3604 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:02 +0200 Subject: x86/fpu: Decouple fpregs_activate()/fpregs_deactivate() from fpu->fpregs_active The fpregs_activate()/fpregs_deactivate() are currently called in such a pattern: if (!fpu->fpregs_active) fpregs_activate(fpu); ... if (fpu->fpregs_active) fpregs_deactivate(fpu); But note that it's actually safe to call them without checking the flag first. This further decouples the fpu->fpregs_active flag from actual FPU logic. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-20-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 7 +------ arch/x86/kernel/fpu/core.c | 3 +-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7fa676f93ac1..42a601673c09 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -526,8 +526,6 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) */ static inline void fpregs_deactivate(struct fpu *fpu) { - WARN_ON_FPU(!fpu->fpregs_active); - fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); trace_x86_fpu_regs_deactivated(fpu); @@ -535,8 +533,6 @@ static inline void fpregs_deactivate(struct fpu *fpu) static inline void fpregs_activate(struct fpu *fpu) { - WARN_ON_FPU(fpu->fpregs_active); - fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); trace_x86_fpu_regs_activated(fpu); @@ -604,8 +600,7 @@ static inline void user_fpu_begin(void) struct fpu *fpu = ¤t->thread.fpu; preempt_disable(); - if (!fpu->fpregs_active) - fpregs_activate(fpu); + fpregs_activate(fpu); preempt_enable(); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index eab244622402..01a47e9edfb4 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -426,8 +426,7 @@ void fpu__drop(struct fpu *fpu) asm volatile("1: fwait\n" "2:\n" _ASM_EXTABLE(1b, 2b)); - if (fpu->fpregs_active) - fpregs_deactivate(fpu); + fpregs_deactivate(fpu); } } else { WARN_ON_FPU(fpu->fpregs_active); -- cgit From 99dc26bda233ee722bbd370bddf20beece3ffb93 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:03 +0200 Subject: x86/fpu: Remove struct fpu::fpregs_active The previous changes paved the way for the removal of the fpu::fpregs_active state flag - we now only have the fpu::fpstate_active and fpu::last_cpu fields left. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-21-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 5 ----- arch/x86/include/asm/fpu/types.h | 23 ----------------------- arch/x86/include/asm/trace/fpu.h | 5 +---- arch/x86/kernel/fpu/core.c | 9 --------- arch/x86/kernel/fpu/signal.c | 2 -- 5 files changed, 1 insertion(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 42a601673c09..629e7abcd6c9 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -526,14 +526,12 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) */ static inline void fpregs_deactivate(struct fpu *fpu) { - fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); trace_x86_fpu_regs_deactivated(fpu); } static inline void fpregs_activate(struct fpu *fpu) { - fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); trace_x86_fpu_regs_activated(fpu); } @@ -552,8 +550,6 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active); - if (old_fpu->fpstate_active) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; @@ -561,7 +557,6 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) old_fpu->last_cpu = cpu; /* But leave fpu_fpregs_owner_ctx! */ - old_fpu->fpregs_active = 0; trace_x86_fpu_regs_deactivated(old_fpu); } else old_fpu->last_cpu = -1; diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3c80f5b9c09d..0c314a397cf5 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -298,29 +298,6 @@ struct fpu { */ unsigned char fpstate_active; - /* - * @fpregs_active: - * - * This flag determines whether a given context is actively - * loaded into the FPU's registers and that those registers - * represent the task's current FPU state. - * - * Note the interaction with fpstate_active: - * - * # task does not use the FPU: - * fpstate_active == 0 - * - * # task uses the FPU and regs are active: - * fpstate_active == 1 && fpregs_active == 1 - * - * # the regs are inactive but still match fpstate: - * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu - * - * The third state is what we use for the lazy restore optimization - * on lazy-switching CPUs. - */ - unsigned char fpregs_active; - /* * @state: * diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 342e59789fcd..da565aae9fd2 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -12,7 +12,6 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, fpregs_active) __field(bool, fpstate_active) __field(u64, xfeatures) __field(u64, xcomp_bv) @@ -20,16 +19,14 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_fast_assign( __entry->fpu = fpu; - __entry->fpregs_active = fpu->fpregs_active; __entry->fpstate_active = fpu->fpstate_active; if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->fpregs_active, __entry->fpstate_active, __entry->xfeatures, __entry->xcomp_bv diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 01a47e9edfb4..93103a909c47 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -147,8 +147,6 @@ void fpu__save(struct fpu *fpu) WARN_ON_FPU(fpu != ¤t->thread.fpu); preempt_disable(); - WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); - trace_x86_fpu_before_save(fpu); if (fpu->fpstate_active) { if (!copy_fpregs_to_fpstate(fpu)) { @@ -191,7 +189,6 @@ EXPORT_SYMBOL_GPL(fpstate_init); int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) @@ -264,7 +261,6 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr); */ void fpu__activate_fpstate_read(struct fpu *fpu) { - WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); /* * If fpregs are active (in the current CPU), then * copy them to the fpstate: @@ -365,7 +361,6 @@ void fpu__current_fpstate_write_end(void) { struct fpu *fpu = ¤t->thread.fpu; - WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); /* * 'fpu' now has an updated copy of the state, but the * registers may still be out of date. Update them with @@ -419,8 +414,6 @@ void fpu__drop(struct fpu *fpu) preempt_disable(); if (fpu == ¤t->thread.fpu) { - WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); - if (fpu->fpstate_active) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" @@ -428,8 +421,6 @@ void fpu__drop(struct fpu *fpu) _ASM_EXTABLE(1b, 2b)); fpregs_deactivate(fpu); } - } else { - WARN_ON_FPU(fpu->fpregs_active); } fpu->fpstate_active = 0; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a88083ba7f8b..629106e51a29 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -171,8 +171,6 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active); - if (fpu->fpstate_active || using_compacted_format()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) -- cgit From 0852b374173bb57f870d78e6c6839c77b339be5f Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 23 Sep 2017 15:00:04 +0200 Subject: x86/fpu: Add FPU state copying quirk to handle XRSTOR failure on Intel Skylake CPUs On Skylake CPUs I noticed that XRSTOR is unable to deal with states created by copyout_from_xsaves() if the xstate has only SSE/YMM state, and no FP state. That is, xfeatures had XFEATURE_MASK_SSE set, but not XFEATURE_MASK_FP. The reason is that part of the SSE/YMM state lives in the MXCSR and MXCSR_FLAGS fields of the FP state. Ensure that whenever we copy SSE or YMM state around, the MXCSR and MXCSR_FLAGS fields are also copied around. Signed-off-by: Rik van Riel Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170210085445.0f1cc708@annuminas.surriel.com Link: http://lkml.kernel.org/r/20170923130016.21448-22-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 3 +++ arch/x86/kernel/fpu/xstate.c | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 0c314a397cf5..71db45ca8870 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -68,6 +68,9 @@ struct fxregs_state { /* Default value for fxregs_state.mxcsr: */ #define MXCSR_DEFAULT 0x1f80 +/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ +#define MXCSR_AND_FLAGS_SIZE sizeof(u64) + /* * Software based FPU emulation state. This is arbitrary really, * it matches the x87 format to make it easier to understand: diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0ef35040d0ad..41c52256bdce 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -920,6 +920,23 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, } #endif /* ! CONFIG_ARCH_HAS_PKEYS */ +/* + * Weird legacy quirk: SSE and YMM states store information in the + * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP + * area is marked as unused in the xfeatures header, we need to copy + * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use. + */ +static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) +{ + if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM))) + return 0; + + if (xfeatures & XFEATURE_MASK_FP) + return 0; + + return 1; +} + /* * This is similar to user_regset_copyout(), but will not add offset to * the source data pointer or increment pos, count, kbuf, and ubuf. @@ -988,6 +1005,12 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of } + if (xfeatures_mxcsr_quirk(header.xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total); + } + /* * Fill xsave->i387.sw_reserved value for ptrace frame: */ @@ -1070,6 +1093,12 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i } + if (xfeatures_mxcsr_quirk(header.xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total); + } + /* * Fill xsave->i387.sw_reserved value for ptrace frame: */ @@ -1122,6 +1151,12 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) } } + if (xfeatures_mxcsr_quirk(xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + memcpy(&xsave->i387.mxcsr, kbuf + offset, size); + } + /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': @@ -1177,6 +1212,13 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) } } + if (xfeatures_mxcsr_quirk(xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size)) + return -EFAULT; + } + /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': -- cgit From 4f8cef59bad29344aca0e2e6b0ad18dadd078fd0 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Sat, 23 Sep 2017 15:00:05 +0200 Subject: x86/fpu: Fix boolreturn.cocci warnings arch/x86/kernel/fpu/xstate.c:931:9-10: WARNING: return of 0/1 in function 'xfeatures_mxcsr_quirk' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Generated by: scripts/coccinelle/misc/boolreturn.cocci Signed-off-by: Fengguang Wu Signed-off-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Yu-cheng Yu Cc: kbuild-all@01.org Cc: tipbuild@zytor.com Link: http://lkml.kernel.org/r/20170306004553.GA25764@lkp-wsm-ep1 Link: http://lkml.kernel.org/r/20170923130016.21448-23-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 41c52256bdce..fda1109cc355 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -929,12 +929,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) { if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM))) - return 0; + return false; if (xfeatures & XFEATURE_MASK_FP) - return 0; + return false; - return 1; + return true; } /* -- cgit From 03eaec81ac09814817e9f0307d572ffe8365f980 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 23 Sep 2017 15:00:06 +0200 Subject: x86/fpu: Turn WARN_ON() in context switch into WARN_ON_FPU() copy_xregs_to_kernel checks if the alternatives have been already patched. This WARN_ON() is always executed in every context switch. All the other checks in fpu internal.h are WARN_ON_FPU(), but this one is plain WARN_ON(). I assume it was forgotten to switch it. So switch it to WARN_ON_FPU() too to avoid some unnecessary code in the context switch, and a potentially expensive cache line miss for the global variable. Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170329062605.4970-1-andi@firstfloor.org Link: http://lkml.kernel.org/r/20170923130016.21448-24-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 629e7abcd6c9..2dca7c65319c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -350,7 +350,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) u32 hmask = mask >> 32; int err; - WARN_ON(!alternatives_patched); + WARN_ON_FPU(!alternatives_patched); XSTATE_XSAVE(xstate, lmask, hmask, err); -- cgit From 814fb7bb7db5433757d76f4c4502c96fc53b0b5e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 23 Sep 2017 15:00:07 +0200 Subject: x86/fpu: Don't let userspace set bogus xcomp_bv On x86, userspace can use the ptrace() or rt_sigreturn() system calls to set a task's extended state (xstate) or "FPU" registers. ptrace() can set them for another task using the PTRACE_SETREGSET request with NT_X86_XSTATE, while rt_sigreturn() can set them for the current task. In either case, registers can be set to any value, but the kernel assumes that the XSAVE area itself remains valid in the sense that the CPU can restore it. However, in the case where the kernel is using the uncompacted xstate format (which it does whenever the XSAVES instruction is unavailable), it was possible for userspace to set the xcomp_bv field in the xstate_header to an arbitrary value. However, all bits in that field are reserved in the uncompacted case, so when switching to a task with nonzero xcomp_bv, the XRSTOR instruction failed with a #GP fault. This caused the WARN_ON_FPU(err) in copy_kernel_to_xregs() to be hit. In addition, since the error is otherwise ignored, the FPU registers from the task previously executing on the CPU were leaked. Fix the bug by checking that the user-supplied value of xcomp_bv is 0 in the uncompacted case, and returning an error otherwise. The reason for validating xcomp_bv rather than simply overwriting it with 0 is that we want userspace to see an error if it (incorrectly) provides an XSAVE area in compacted format rather than in uncompacted format. Note that as before, in case of error we clear the task's FPU state. This is perhaps non-ideal, especially for PTRACE_SETREGSET; it might be better to return an error before changing anything. But it seems the "clear on error" behavior is fine for now, and it's a little tricky to do otherwise because it would mean we couldn't simply copy the full userspace state into kernel memory in one __copy_from_user(). This bug was found by syzkaller, which hit the above-mentioned WARN_ON_FPU(): WARNING: CPU: 1 PID: 0 at ./arch/x86/include/asm/fpu/internal.h:373 __switch_to+0x5b5/0x5d0 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.13.0 #453 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff9ba2bc8e42c0 task.stack: ffffa78cc036c000 RIP: 0010:__switch_to+0x5b5/0x5d0 RSP: 0000:ffffa78cc08bbb88 EFLAGS: 00010082 RAX: 00000000fffffffe RBX: ffff9ba2b8bf2180 RCX: 00000000c0000100 RDX: 00000000ffffffff RSI: 000000005cb10700 RDI: ffff9ba2b8bf36c0 RBP: ffffa78cc08bbbd0 R08: 00000000929fdf46 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9ba2bc8e42c0 R13: 0000000000000000 R14: ffff9ba2b8bf3680 R15: ffff9ba2bf5d7b40 FS: 00007f7e5cb10700(0000) GS:ffff9ba2bf400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004005cc CR3: 0000000079fd5000 CR4: 00000000001406e0 Call Trace: Code: 84 00 00 00 00 00 e9 11 fd ff ff 0f ff 66 0f 1f 84 00 00 00 00 00 e9 e7 fa ff ff 0f ff 66 0f 1f 84 00 00 00 00 00 e9 c2 fa ff ff <0f> ff 66 0f 1f 84 00 00 00 00 00 e9 d4 fc ff ff 66 66 2e 0f 1f Here is a C reproducer. The expected behavior is that the program spin forever with no output. However, on a buggy kernel running on a processor with the "xsave" feature but without the "xsaves" feature (e.g. Sandy Bridge through Broadwell for Intel), within a second or two the program reports that the xmm registers were corrupted, i.e. were not restored correctly. With CONFIG_X86_DEBUG_FPU=y it also hits the above kernel warning. #define _GNU_SOURCE #include #include #include #include #include #include #include #include int main(void) { int pid = fork(); uint64_t xstate[512]; struct iovec iov = { .iov_base = xstate, .iov_len = sizeof(xstate) }; if (pid == 0) { bool tracee = true; for (int i = 0; i < sysconf(_SC_NPROCESSORS_ONLN) && tracee; i++) tracee = (fork() != 0); uint32_t xmm0[4] = { [0 ... 3] = tracee ? 0x00000000 : 0xDEADBEEF }; asm volatile(" movdqu %0, %%xmm0\n" " mov %0, %%rbx\n" "1: movdqu %%xmm0, %0\n" " mov %0, %%rax\n" " cmp %%rax, %%rbx\n" " je 1b\n" : "+m" (xmm0) : : "rax", "rbx", "xmm0"); printf("BUG: xmm registers corrupted! tracee=%d, xmm0=%08X%08X%08X%08X\n", tracee, xmm0[0], xmm0[1], xmm0[2], xmm0[3]); } else { usleep(100000); ptrace(PTRACE_ATTACH, pid, 0, 0); wait(NULL); ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov); xstate[65] = -1; ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov); ptrace(PTRACE_CONT, pid, 0, 0); wait(NULL); } return 1; } Note: the program only tests for the bug using the ptrace() system call. The bug can also be reproduced using the rt_sigreturn() system call, but only when called from a 32-bit program, since for 64-bit programs the kernel restores the FPU state from the signal frame by doing XRSTOR directly from userspace memory (with proper error checking). Reported-by: Dmitry Vyukov Signed-off-by: Eric Biggers Reviewed-by: Kees Cook Reviewed-by: Rik van Riel Acked-by: Dave Hansen Cc: [v3.17+] Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Fixes: 0b29643a5843 ("x86/xsaves: Change compacted format xsave area header") Link: http://lkml.kernel.org/r/20170922174156.16780-2-ebiggers3@gmail.com Link: http://lkml.kernel.org/r/20170923130016.21448-25-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/regset.c | 4 ++++ arch/x86/kernel/fpu/signal.c | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 19a7385a912c..c764f7405322 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -141,6 +141,10 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, ret = copy_user_to_xstate(xsave, ubuf); } else { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + + /* xcomp_bv must be 0 when using uncompacted format */ + if (!ret && xsave->header.xcomp_bv) + ret = -EINVAL; } /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 629106e51a29..da68ea1c3a44 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -324,11 +324,16 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ fpu__drop(fpu); - if (using_compacted_format()) + if (using_compacted_format()) { err = copy_user_to_xstate(&fpu->state.xsave, buf_fx); - else + } else { err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); + /* xcomp_bv must be 0 when using uncompacted format */ + if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv) + err = -EINVAL; + } + if (err || __copy_from_user(&env, buf, sizeof(env))) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); -- cgit From d5c8028b4788f62b31fb79a331b3ad3e041fa366 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 23 Sep 2017 15:00:09 +0200 Subject: x86/fpu: Reinitialize FPU registers if restoring FPU state fails Userspace can change the FPU state of a task using the ptrace() or rt_sigreturn() system calls. Because reserved bits in the FPU state can cause the XRSTOR instruction to fail, the kernel has to carefully validate that no reserved bits or other invalid values are being set. Unfortunately, there have been bugs in this validation code. For example, we were not checking that the 'xcomp_bv' field in the xstate_header was 0. As-is, such bugs are exploitable to read the FPU registers of other processes on the system. To do so, an attacker can create a task, assign to it an invalid FPU state, then spin in a loop and monitor the values of the FPU registers. Because the task's FPU registers are not being restored, sometimes the FPU registers will have the values from another process. This is likely to continue to be a problem in the future because the validation done by the CPU instructions like XRSTOR is not immediately visible to kernel developers. Nor will invalid FPU states ever be encountered during ordinary use --- they will only be seen during fuzzing or exploits. There can even be reserved bits outside the xstate_header which are easy to forget about. For example, the MXCSR register contains reserved bits, which were not validated by the KVM_SET_XSAVE ioctl until commit a575813bfe4b ("KVM: x86: Fix load damaged SSEx MXCSR register"). Therefore, mitigate this class of vulnerability by restoring the FPU registers from init_fpstate if restoring from the task's state fails. We actually used to do this, but it was (perhaps unwisely) removed by commit 9ccc27a5d297 ("x86/fpu: Remove error return values from copy_kernel_to_*regs() functions"). This new patch is also a bit different. First, it only clears the registers, not also the bad in-memory state; this is simpler and makes it easier to make the mitigation cover all callers of __copy_kernel_to_fpregs(). Second, it does the register clearing in an exception handler so that no extra instructions are added to context switches. In fact, we *remove* instructions, since previously we were always zeroing the register containing 'err' even if CONFIG_X86_DEBUG_FPU was disabled. Signed-off-by: Eric Biggers Reviewed-by: Rik van Riel Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170922174156.16780-4-ebiggers3@gmail.com Link: http://lkml.kernel.org/r/20170923130016.21448-27-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 51 +++++++++++-------------------------- arch/x86/mm/extable.c | 24 +++++++++++++++++ 2 files changed, 39 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 2dca7c65319c..cf290d424e48 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -120,20 +120,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) -#define check_insn(insn, output, input...) \ -({ \ - int err; \ +#define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ + : output : input) static inline int copy_fregs_to_user(struct fregs_state __user *fx) { @@ -153,20 +144,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) { - int err; - if (IS_ENABLED(CONFIG_X86_32)) { - err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { - err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { /* See comment in copy_fxregs_to_kernel() below. */ - err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); + kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); } } - /* Copying from a kernel buffer to FPU registers should never fail: */ - WARN_ON_FPU(err); } static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) @@ -183,9 +170,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) static inline void copy_kernel_to_fregs(struct fregs_state *fx) { - int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - - WARN_ON_FPU(err); + kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline int copy_user_to_fregs(struct fregs_state __user *fx) @@ -281,18 +266,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact * XSAVE area format. */ -#define XSTATE_XRESTORE(st, lmask, hmask, err) \ +#define XSTATE_XRESTORE(st, lmask, hmask) \ asm volatile(ALTERNATIVE(XRSTOR, \ XRSTORS, X86_FEATURE_XSAVES) \ "\n" \ - "xor %[err], %[err]\n" \ "3:\n" \ - ".pushsection .fixup,\"ax\"\n" \ - "4: movl $-2, %[err]\n" \ - "jmp 3b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(661b, 4b) \ - : [err] "=r" (err) \ + _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ + : \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -336,7 +316,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - /* We should never fault when copying from a kernel buffer: */ + /* + * We should never fault when copying from a kernel buffer, and the FPU + * state we set at boot time should be valid. + */ WARN_ON_FPU(err); } @@ -365,12 +348,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; - int err; - - XSTATE_XRESTORE(xstate, lmask, hmask, err); - /* We should never fault when copying from a kernel buffer: */ - WARN_ON_FPU(err); + XSTATE_XRESTORE(xstate, lmask, hmask); } /* diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index c076f710de4c..c3521e2be396 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup, } EXPORT_SYMBOL_GPL(ex_handler_refcount); +/* + * Handler for when we fail to restore a task's FPU state. We should never get + * here because the FPU state of a task using the FPU (task->thread.fpu.state) + * should always be valid. However, past bugs have allowed userspace to set + * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn(). + * These caused XRSTOR to fail when switching to the task, leaking the FPU + * registers of the task previously executing on the CPU. Mitigate this class + * of vulnerability by restoring from the initial state (essentially, zeroing + * out all the FPU registers) if we can't restore from the task's FPU state. + */ +bool ex_handler_fprestore(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + regs->ip = ex_fixup_addr(fixup); + + WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", + (void *)instruction_pointer(regs)); + + __copy_kernel_to_fpregs(&init_fpstate, -1); + return true; +} +EXPORT_SYMBOL_GPL(ex_handler_fprestore); + bool ex_handler_ext(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { -- cgit From a3c4fb7c9c2ebfd50b8c60f6c069932bb319bc37 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 4 Sep 2017 10:32:15 +0200 Subject: x86/mm: Fix fault error path using unsafe vma pointer commit 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal generation code") passes down a vma pointer to the error path, but that is done once the mmap_sem is released when calling mm_fault_error() from __do_page_fault(). This is dangerous as the vma structure is no more safe to be used once the mmap_sem has been released. As only the protection key value is required in the error processing, we could just pass down this value. Fix it by passing a pointer to a protection key value down to the fault signal generation code. The use of a pointer allows to keep the check generating a warning message in fill_sig_info_pkey() when the vma was not known. If the pointer is valid, the protection value can be accessed by deferencing the pointer. [ tglx: Made *pkey u32 as that's the type which is passed in siginfo ] Fixes: 7b2d0dbac489 ("x86/mm/pkeys: Pass VMA down in to fault signal generation code") Signed-off-by: Laurent Dufour Signed-off-by: Thomas Gleixner Cc: linux-mm@kvack.org Cc: Dave Hansen Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1504513935-12742-1-git-send-email-ldufour@linux.vnet.ibm.com --- arch/x86/mm/fault.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 39567b5c33da..e2baeaa053a5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, - struct vm_area_struct *vma) +static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) @@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, * valid VMA, so we should never reach this without a * valid VMA. */ - if (!vma) { + if (!pkey) { WARN_ONCE(1, "PKU fault with no VMA passed in"); info->si_pkey = 0; return; @@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, * absolutely guranteed to be 100% accurate because of * the race explained above. */ - info->si_pkey = vma_pkey(vma); + info->si_pkey = *pkey; } static void force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk, struct vm_area_struct *vma, - int fault) + struct task_struct *tsk, u32 *pkey, int fault) { unsigned lsb = 0; siginfo_t info; @@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, vma); + fill_sig_info_pkey(si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } @@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; unsigned long flags; int sig; - /* No context means no VMA to pass down */ - struct vm_area_struct *vma = NULL; /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs, X86_TRAP_PF)) { @@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, /* XXX: hwpoison faults will set the wrong code. */ force_sig_info_fault(signal, si_code, address, - tsk, vma, 0); + tsk, NULL, 0); } /* @@ -896,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, - int si_code) + unsigned long address, u32 *pkey, int si_code) { struct task_struct *tsk = current; @@ -945,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0); + force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); return; } @@ -958,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static noinline void bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma) + unsigned long address, u32 *pkey) { - __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR); + __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); } static void @@ -968,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct vm_area_struct *vma, int si_code) { struct mm_struct *mm = current->mm; + u32 pkey; + + if (vma) + pkey = vma_pkey(vma); /* * Something tried to access memory that isn't in our memory map.. @@ -975,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, */ up_read(&mm->mmap_sem); - __bad_area_nosemaphore(regs, error_code, address, vma, si_code); + __bad_area_nosemaphore(regs, error_code, address, + (vma) ? &pkey : NULL, si_code); } static noinline void @@ -1018,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - struct vm_area_struct *vma, unsigned int fault) + u32 *pkey, unsigned int fault) { struct task_struct *tsk = current; int code = BUS_ADRERR; @@ -1045,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, code = BUS_MCEERR_AR; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault); + force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); } static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, - unsigned int fault) + unsigned long address, u32 *pkey, unsigned int fault) { if (fatal_signal_pending(current) && !(error_code & PF_USER)) { no_context(regs, error_code, address, 0, 0); @@ -1075,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) - do_sigbus(regs, error_code, address, vma, fault); + do_sigbus(regs, error_code, address, pkey, fault); else if (fault & VM_FAULT_SIGSEGV) - bad_area_nosemaphore(regs, error_code, address, vma); + bad_area_nosemaphore(regs, error_code, address, pkey); else BUG(); } @@ -1267,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, struct mm_struct *mm; int fault, major = 0; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + u32 pkey; tsk = current; mm = tsk->mm; @@ -1467,9 +1467,10 @@ good_area: return; } + pkey = vma_pkey(vma); up_read(&mm->mmap_sem); if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, error_code, address, vma, fault); + mm_fault_error(regs, error_code, address, &pkey, fault); return; } -- cgit From 7d7099433d9eaaa5a989a55f1fa354c16a3ad297 Mon Sep 17 00:00:00 2001 From: Sean Fu Date: Mon, 11 Sep 2017 08:33:21 +0800 Subject: x86/sysfs: Fix off-by-one error in loop termination An off-by-one error in loop terminantion conditions in create_setup_data_nodes() will lead to memory leak when create_setup_data_node() failed. Signed-off-by: Sean Fu Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/1505090001-1157-1-git-send-email-fxinrong@gmail.com --- arch/x86/kernel/ksysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index 4b0592ca9e47..8c1cc08f514f 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent) return 0; out_clean_nodes: - for (j = i - 1; j > 0; j--) + for (j = i - 1; j >= 0; j--) cleanup_setup_data_node(*(kobjp + j)); kfree(kobjp); out_setup_data_kobj: -- cgit From 5ac751d9e6b187c4a0000879d6598eb2292db949 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 12 Sep 2017 19:40:00 +0300 Subject: x86: Don't cast away the __user in __get_user_asm_u64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't cast away the __user in __get_user_asm_u64() on x86-32. Prevents sparse getting upset. Signed-off-by: Ville Syrjälä Signed-off-by: Thomas Gleixner Cc: Benjamin LaHaise Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20170912164000.13745-1-ville.syrjala@linux.intel.com --- arch/x86/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 78e8fcc87d4c..4b892917edeb 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -337,7 +337,7 @@ do { \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=r" (retval), "=&A"(x) \ - : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ + : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \ "i" (errret), "0" (retval)); \ }) -- cgit From b09c146f8f63c0e03adba74df76bf9c2be466fec Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 8 Sep 2017 17:34:47 -0400 Subject: perf/x86/intel/cstate: Add missing CPU IDs Skylake server uses the same C-state residency events as Sandy Bridge. Denverton and Gemini lake use the same C-state residency events as Apollo Lake. Signed-off-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: piotr.luc@intel.com Cc: harry.pan@intel.com Cc: srinivas.pandruvada@linux.intel.com Link: http://lkml.kernel.org/r/20170908213449.6224-1-kan.liang@intel.com --- arch/x86/events/intel/cstate.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4cf100ff2a37..72db0664a53d 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), @@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); -- cgit From 1aaccc40a1864053da26605b0297be16dd52641e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 8 Sep 2017 17:34:48 -0400 Subject: perf/x86/msr: Add missing CPU IDs Goldmont, Glodmont plus and Xeon Phi have MSR_SMI_COUNT as well. Signed-off-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: piotr.luc@intel.com Cc: harry.pan@intel.com Cc: srinivas.pandruvada@linux.intel.com Link: http://lkml.kernel.org/r/20170908213449.6224-2-kan.liang@intel.com --- arch/x86/events/msr.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 4bb3ec69e8ea..06723671ae4e 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -63,6 +63,14 @@ static bool test_intel(int idx) case INTEL_FAM6_ATOM_SILVERMONT1: case INTEL_FAM6_ATOM_SILVERMONT2: case INTEL_FAM6_ATOM_AIRMONT: + + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_DENVERTON: + + case INTEL_FAM6_ATOM_GEMINI_LAKE: + + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: if (idx == PERF_MSR_SMI) return true; break; -- cgit From 450a97893559354b927c935f39ee11126f01f520 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 8 Sep 2017 17:34:49 -0400 Subject: perf/x86/intel/rapl: Add missing CPU IDs DENVERTON and GEMINI_LAKE support same RAPL counters as Apollo Lake. Signed-off-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: piotr.luc@intel.com Cc: harry.pan@intel.com Cc: srinivas.pandruvada@linux.intel.com Link: http://lkml.kernel.org/r/20170908213449.6224-3-kan.liang@intel.com --- arch/x86/events/intel/rapl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 8e2457cb6b4a..005908ee9333 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init), + + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init), {}, }; -- cgit From 29b46dfb136cdbeece542b3f01115237e43f2855 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 11 Sep 2017 10:10:15 -0700 Subject: perf/x86/intel/uncore: Correct num_boxes for IIO and IRP There are 6 IIO/IRP boxes for CBDMA, PCIe0-2, MCP 0 and MCP 1 separately. Correct the num_boxes. Signed-off-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: acme@kernel.org Link: http://lkml.kernel.org/r/1505149816-12580-1-git-send-email-kan.liang@intel.com --- arch/x86/events/intel/uncore_snbep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index db1fe377e6dd..a7196818416a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = { static struct intel_uncore_type skx_uncore_iio = { .name = "iio", .num_counters = 4, - .num_boxes = 5, + .num_boxes = 6, .perf_ctr_bits = 48, .event_ctl = SKX_IIO0_MSR_PMON_CTL0, .perf_ctr = SKX_IIO0_MSR_PMON_CTR0, @@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = { static struct intel_uncore_type skx_uncore_irp = { .name = "irp", .num_counters = 2, - .num_boxes = 5, + .num_boxes = 6, .perf_ctr_bits = 48, .event_ctl = SKX_IRP0_MSR_PMON_CTL0, .perf_ctr = SKX_IRP0_MSR_PMON_CTR0, -- cgit From 0add53713b1c07a1c71e27a20e21eb7c180b4e7b Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 19 Sep 2017 16:40:21 +0200 Subject: microblaze: Add missing kvm_para.h to Kbuild Running make allmodconfig;make is throwing compilation error: CC kernel/watchdog.o In file included from ./include/linux/kvm_para.h:4:0, from kernel/watchdog.c:29: ./include/uapi/linux/kvm_para.h:32:26: fatal error: asm/kvm_para.h: No such file or directory #include ^ compilation terminated. make[1]: *** [kernel/watchdog.o] Error 1 make: *** [kernel/watchdog.o] Error 2 Reported-by: Michal Hocko Suggested-by: Geert Uytterhoeven Signed-off-by: Michal Simek Fixes: 83f0124ad81e87b ("microblaze: remove asm-generic wrapper headers") Reviewed-by: Tobias Klauser Tested-by: Michal Hocko --- arch/microblaze/include/uapi/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index e77a596f3f1e..06609ca36115 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -7,6 +7,7 @@ generic-y += fcntl.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += kvm_para.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h -- cgit From 64c99853baca40e2f06038c4a926009edd14c7c3 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 21 Sep 2017 00:29:36 +0200 Subject: microblaze: Cocci spatch "vma_pages" Use vma_pages function on vma object instead of explicit computation. Found by coccinelle spatch "api/vma_pages.cocci" Signed-off-by: Thomas Meyer Signed-off-by: Michal Simek --- arch/microblaze/kernel/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index e45ada8fb006..94700c5270a9 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -165,7 +165,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, unsigned long attrs) { #ifdef CONFIG_MMU - unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; unsigned long pfn; -- cgit From 428dbf156cc5a8f9994d1f1a5c79373d15476f3c Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Mon, 18 Sep 2017 10:53:29 -0600 Subject: arch: change default endian for microblaze Fix the default for microblaze. Michal Simek mentioned default for microblaze should be CPU_LITTLE_ENDIAN. Fixes : commit 206d3642d8ee ("arch/microblaze: add choice for endianness and update Makefile") Signed-off-by: Babu Moger Cc: Michal Simek Signed-off-by: Michal Simek --- arch/microblaze/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 9d26abdf0dc1..4f798aa671dd 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -39,7 +39,7 @@ config MICROBLAZE # Endianness selection choice prompt "Endianness selection" - default CPU_BIG_ENDIAN + default CPU_LITTLE_ENDIAN help microblaze architectures can be configured for either little or big endian formats. Be sure to select the appropriate mode. -- cgit From 4618e90965f272fe522f2af2523a60d0d4bc78f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:10 +0200 Subject: x86/fpu: Fix fpu__activate_fpstate_read() and update comments fpu__activate_fpstate_read() can be called for the current task when coredumping - or for stopped tasks when ptrace-ing. Implement this properly in the code and update the comments. This also fixes an incorrect (but harmless) warning introduced by one of the earlier patches. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-28-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 93103a909c47..afd3f2a5c64e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -254,18 +254,21 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr); /* * This function must be called before we read a task's fpstate. * - * If the task has not used the FPU before then initialize its - * fpstate. + * There's two cases where this gets called: + * + * - for the current task (when coredumping), in which case we have + * to save the latest FPU registers into the fpstate, + * + * - or it's called for stopped tasks (ptrace), in which case the + * registers were already saved by the context-switch code when + * the task scheduled out - we only have to initialize the registers + * if they've never been initialized. * * If the task has used the FPU before then save it. */ void fpu__activate_fpstate_read(struct fpu *fpu) { - /* - * If fpregs are active (in the current CPU), then - * copy them to the fpstate: - */ - if (fpu->fpstate_active) { + if (fpu == ¤t->thread.fpu) { fpu__save(fpu); } else { if (!fpu->fpstate_active) { -- cgit From 685c930d6e58e31e251ec354f9dca3958a4c5040 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:11 +0200 Subject: x86/fpu: Remove fpu__current_fpstate_write_begin/end() These functions are not used anymore, so remove them. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-29-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 2 -- arch/x86/kernel/fpu/core.c | 63 ------------------------------------- 2 files changed, 65 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index cf290d424e48..508e4181c4af 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,8 +26,6 @@ extern void fpu__activate_curr(struct fpu *fpu); extern void fpu__activate_fpstate_read(struct fpu *fpu); extern void fpu__activate_fpstate_write(struct fpu *fpu); -extern void fpu__current_fpstate_write_begin(void); -extern void fpu__current_fpstate_write_end(void); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index afd3f2a5c64e..b2cdeb3b1860 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -316,69 +316,6 @@ void fpu__activate_fpstate_write(struct fpu *fpu) } } -/* - * This function must be called before we write the current - * task's fpstate. - * - * This call gets the current FPU register state and moves - * it in to the 'fpstate'. Preemption is disabled so that - * no writes to the 'fpstate' can occur from context - * swiches. - * - * Must be followed by a fpu__current_fpstate_write_end(). - */ -void fpu__current_fpstate_write_begin(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - - /* - * Ensure that the context-switching code does not write - * over the fpstate while we are doing our update. - */ - preempt_disable(); - - /* - * Move the fpregs in to the fpu's 'fpstate'. - */ - fpu__activate_fpstate_read(fpu); - - /* - * The caller is about to write to 'fpu'. Ensure that no - * CPU thinks that its fpregs match the fpstate. This - * ensures we will not be lazy and skip a XRSTOR in the - * future. - */ - __fpu_invalidate_fpregs_state(fpu); -} - -/* - * This function must be paired with fpu__current_fpstate_write_begin() - * - * This will ensure that the modified fpstate gets placed back in - * the fpregs if necessary. - * - * Note: This function may be called whether or not an _actual_ - * write to the fpstate occurred. - */ -void fpu__current_fpstate_write_end(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - - /* - * 'fpu' now has an updated copy of the state, but the - * registers may still be out of date. Update them with - * an XRSTOR if they are active. - */ - if (fpu->fpstate_active) - copy_kernel_to_fpregs(&fpu->state); - - /* - * Our update is done and the fpregs/fpstate are in sync - * if necessary. Context switches can happen again. - */ - preempt_enable(); -} - /* * 'fpu__restore()' is called to copy FPU registers from * the FPU fpstate to the live hw registers and to activate -- cgit From e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Sep 2017 09:43:36 +0200 Subject: x86/fpu: Rename fpu::fpstate_active to fpu::initialized The x86 FPU code used to have a complex state machine where both the FPU registers and the FPU state context could be 'active' (or inactive) independently of each other - which enabled features like lazy FPU restore. Much of this complexity is gone in the current code: now we basically can have FPU-less tasks (kernel threads) that don't use (and save/restore) FPU state at all, plus full FPU users that save/restore directly with no laziness whatsoever. But the fpu::fpstate_active still carries bits of the old complexity - meanwhile this flag has become a simple flag that shows whether the FPU context saving area in the thread struct is initialized and used, or not. Rename it to fpu::initialized to express this simplicity in the name as well. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-30-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/include/asm/fpu/internal.h | 4 ++-- arch/x86/include/asm/fpu/types.h | 6 +++--- arch/x86/include/asm/trace/fpu.h | 8 ++++---- arch/x86/kernel/fpu/core.c | 24 ++++++++++++------------ arch/x86/kernel/fpu/init.c | 2 +- arch/x86/kernel/fpu/regset.c | 6 +++--- arch/x86/kernel/fpu/signal.c | 8 ++++---- arch/x86/kernel/fpu/xstate.c | 2 +- arch/x86/kernel/signal.c | 6 +++--- arch/x86/mm/pkeys.c | 2 +- 11 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index e0bb46c02857..0e2a5edbce00 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (fpu->fpstate_active) { + if (fpu->initialized) { unsigned long fx_aligned, math_size; sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 508e4181c4af..b26ae05da18a 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -527,7 +527,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->fpstate_active) { + if (old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -550,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) { bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->fpstate_active; + new_fpu->initialized; if (preload) { if (!fpregs_state_valid(new_fpu, cpu)) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 71db45ca8870..a1520575d86b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -293,13 +293,13 @@ struct fpu { unsigned int last_cpu; /* - * @fpstate_active: + * @initialized: * - * This flag indicates whether this context is active: if the task + * This flag indicates whether this context is initialized: if the task * is not running then we can restore from this context, if the task * is running then we should save into this context. */ - unsigned char fpstate_active; + unsigned char initialized; /* * @state: diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index da565aae9fd2..39f7a27bef13 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -12,22 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, fpstate_active) + __field(bool, initialized) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->fpstate_active = fpu->fpstate_active; + __entry->initialized = fpu->initialized; if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->fpstate_active, + __entry->initialized, __entry->xfeatures, __entry->xcomp_bv ) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b2cdeb3b1860..c8d6032f04d0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -100,7 +100,7 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->fpstate_active) { + if (fpu->initialized) { /* * Ignore return value -- we don't care if reg state * is clobbered. @@ -116,7 +116,7 @@ void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu; - if (fpu->fpstate_active) + if (fpu->initialized) copy_kernel_to_fpregs(&fpu->state); kernel_fpu_enable(); @@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu) preempt_disable(); trace_x86_fpu_before_save(fpu); - if (fpu->fpstate_active) { + if (fpu->initialized) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } @@ -191,7 +191,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { dst_fpu->last_cpu = -1; - if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) + if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); @@ -240,13 +240,13 @@ void fpu__activate_curr(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - if (!fpu->fpstate_active) { + if (!fpu->initialized) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); trace_x86_fpu_activate_state(fpu); /* Safe to do for the current task: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } EXPORT_SYMBOL_GPL(fpu__activate_curr); @@ -271,13 +271,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu) if (fpu == ¤t->thread.fpu) { fpu__save(fpu); } else { - if (!fpu->fpstate_active) { + if (!fpu->initialized) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); trace_x86_fpu_activate_state(fpu); /* Safe to do for current and for stopped child tasks: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } } @@ -303,7 +303,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) */ WARN_ON_FPU(fpu == ¤t->thread.fpu); - if (fpu->fpstate_active) { + if (fpu->initialized) { /* Invalidate any lazy state: */ __fpu_invalidate_fpregs_state(fpu); } else { @@ -312,7 +312,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) trace_x86_fpu_activate_state(fpu); /* Safe to do for stopped child tasks: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } @@ -354,7 +354,7 @@ void fpu__drop(struct fpu *fpu) preempt_disable(); if (fpu == ¤t->thread.fpu) { - if (fpu->fpstate_active) { + if (fpu->initialized) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" @@ -363,7 +363,7 @@ void fpu__drop(struct fpu *fpu) } } - fpu->fpstate_active = 0; + fpu->initialized = 0; trace_x86_fpu_dropped(fpu); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index d5d44c452624..7affb7e3d9a5 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - WARN_ON_FPU(current->thread.fpu.fpstate_active); + WARN_ON_FPU(current->thread.fpu.initialized); } /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index c764f7405322..19e82334e811 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r { struct fpu *target_fpu = &target->thread.fpu; - return target_fpu->fpstate_active ? regset->n : 0; + return target_fpu->initialized ? regset->n : 0; } int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { struct fpu *target_fpu = &target->thread.fpu; - if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active) + if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized) return regset->n; else return 0; @@ -380,7 +380,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) struct fpu *fpu = &tsk->thread.fpu; int fpvalid; - fpvalid = fpu->fpstate_active; + fpvalid = fpu->initialized; if (fpvalid) fpvalid = !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index da68ea1c3a44..ab2dd24cfea4 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -171,7 +171,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpu->fpstate_active || using_compacted_format()) { + if (fpu->initialized || using_compacted_format()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; @@ -315,12 +315,12 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) int err = 0; /* - * Drop the current fpu which clears fpu->fpstate_active. This ensures + * Drop the current fpu which clears fpu->initialized. This ensures * that any context-switch during the copy of the new state, * avoids the intermediate state from getting restored/saved. * Thus avoiding the new restored state from getting corrupted. * We will be ready to restore/save the state only after - * fpu->fpstate_active is again set. + * fpu->initialized is again set. */ fpu__drop(fpu); @@ -342,7 +342,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } - fpu->fpstate_active = 1; + fpu->initialized = 1; preempt_disable(); fpu__restore(fpu); preempt_enable(); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index fda1109cc355..703e76d027ee 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -867,7 +867,7 @@ const void *get_xsave_field_ptr(int xsave_state) { struct fpu *fpu = ¤t->thread.fpu; - if (!fpu->fpstate_active) + if (!fpu->initialized) return NULL; /* * fpu__save() takes the CPU's xstate registers diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e04442345fc0..4e188fda5961 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, sp = (unsigned long) ka->sa.sa_restorer; } - if (fpu->fpstate_active) { + if (fpu->initialized) { sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), &buf_fx, &math_size); *fpstate = (void __user *)sp; @@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if (fpu->fpstate_active && + if (fpu->initialized && copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; @@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (fpu->fpstate_active) + if (fpu->initialized) fpu__clear(fpu); } signal_setup_done(failed, ksig, stepping); diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 4d24269c071f..d7bc0eea20a5 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -44,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm) */ preempt_disable(); if (!need_to_set_mm_pkey && - current->thread.fpu.fpstate_active && + current->thread.fpu.initialized && !__pkru_allows_read(read_pkru(), execute_only_pkey)) { preempt_enable(); return execute_only_pkey; -- cgit From 7f1487c59b7c6dcb20155f4302985da2659a2997 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:13 +0200 Subject: x86/fpu: Fix stale comments about lazy FPU logic We don't do any lazy restore anymore, what we have are two pieces of optimization: - no-FPU tasks that don't save/restore the FPU context (kernel threads are such) - cached FPU registers maintained via the fpu->last_cpu field. This means that if an FPU task context switches to a non-FPU task then we can maintain the FPU registers as an in-FPU copies (cache), and skip the restoration of them once we switch back to the original FPU-using task. Update all the comments that still referred to old 'lazy' and 'unlazy' concepts. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-31-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c8d6032f04d0..77668d91fdc1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -205,9 +205,6 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) /* * Save current FPU registers directly into the child * FPU context, without any memory-to-memory copying. - * In lazy mode, if the FPU context isn't loaded into - * fpregs, CR0.TS will be set and do_device_not_available - * will load the FPU context. * * We have to do all this with preemption disabled, * mostly because of the FNSAVE case, because in that @@ -285,13 +282,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu) /* * This function must be called before we write a task's fpstate. * - * If the task has used the FPU before then unlazy it. + * If the task has used the FPU before then invalidate any cached FPU registers. * If the task has not used the FPU before then initialize its fpstate. * * After this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will * restore the modified FPU state from the modified context. If we - * didn't clear its lazy status here then the lazy in-registers + * didn't clear its cached status here then the cached in-registers * state pending on its former CPU could be restored, corrupting * the modifications. */ @@ -304,7 +301,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) WARN_ON_FPU(fpu == ¤t->thread.fpu); if (fpu->initialized) { - /* Invalidate any lazy state: */ + /* Invalidate any cached state: */ __fpu_invalidate_fpregs_state(fpu); } else { fpstate_init(&fpu->state); -- cgit From e10078eba69859359ce8644dd423b4132a6a8913 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:14 +0200 Subject: x86/fpu: Simplify and speed up fpu__copy() fpu__copy() has a preempt_disable()/enable() pair, which it had to do to be able to atomically unlazy the current task when doing an FNSAVE. But we don't unlazy tasks anymore, we always do direct saves/restores of FPU context. So remove both the unnecessary critical section, and update the comments. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-32-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 77668d91fdc1..52122dd418ae 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -206,22 +206,13 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Save current FPU registers directly into the child * FPU context, without any memory-to-memory copying. * - * We have to do all this with preemption disabled, - * mostly because of the FNSAVE case, because in that - * case we must not allow preemption in the window - * between the FNSAVE and us marking the context lazy. - * - * It shouldn't be an issue as even FNSAVE is plenty - * fast in terms of critical section length. + * ( The function 'fails' in the FNSAVE case, which destroys + * register contents so we have to copy them back. ) */ - preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, - fpu_kernel_xstate_size); - + memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); copy_kernel_to_fpregs(&src_fpu->state); } - preempt_enable(); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); -- cgit From 2ce03d850b9a2f17d55596ecfa86e72b5687a627 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:15 +0200 Subject: x86/fpu: Rename fpu__activate_curr() to fpu__initialize() Rename this function to better express that it's all about initializing the FPU state of a task which goes hand in hand with the fpu::initialized field. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-33-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 2 +- arch/x86/kernel/fpu/core.c | 8 ++++---- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kvm/x86.c | 2 +- arch/x86/math-emu/fpu_entry.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index b26ae05da18a..7c980aafb8aa 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -23,7 +23,7 @@ /* * High level FPU state handling functions: */ -extern void fpu__activate_curr(struct fpu *fpu); +extern void fpu__initialize(struct fpu *fpu); extern void fpu__activate_fpstate_read(struct fpu *fpu); extern void fpu__activate_fpstate_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 52122dd418ae..07db9d94b68b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -224,7 +224,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Activate the current task's in-memory FPU context, * if it has not been used before: */ -void fpu__activate_curr(struct fpu *fpu) +void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); @@ -237,7 +237,7 @@ void fpu__activate_curr(struct fpu *fpu) fpu->initialized = 1; } } -EXPORT_SYMBOL_GPL(fpu__activate_curr); +EXPORT_SYMBOL_GPL(fpu__initialize); /* * This function must be called before we read a task's fpstate. @@ -316,7 +316,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) */ void fpu__restore(struct fpu *fpu) { - fpu__activate_curr(fpu); + fpu__initialize(fpu); /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); @@ -392,7 +392,7 @@ void fpu__clear(struct fpu *fpu) */ if (static_cpu_has(X86_FEATURE_FPU)) { preempt_disable(); - fpu__activate_curr(fpu); + fpu__initialize(fpu); user_fpu_begin(); copy_init_fpstate_to_fpregs(); preempt_enable(); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index ab2dd24cfea4..7fa3bdb331e9 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -280,7 +280,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - fpu__activate_curr(fpu); + fpu__initialize(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd17b7d9a107..03869eb7fcd6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - fpu__activate_curr(fpu); + fpu__initialize(fpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index d4a7df2205b8..220638a4cb94 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info) struct desc_struct code_descriptor; struct fpu *fpu = ¤t->thread.fpu; - fpu__activate_curr(fpu); + fpu__initialize(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { -- cgit From 369a036de206710ff27a66f9bffe78ef657648c3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 13:37:45 +0200 Subject: x86/fpu: Rename fpu__activate_fpstate_read/write() to fpu__prepare_[read|write]() As per the new nomenclature we don't 'activate' the FPU state anymore, we initialize it. So drop the _activate_fpstate name from these functions, which were a bit of a mouthful anyway, and name them: fpu__prepare_read() fpu__prepare_write() Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Eric Biggers Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 4 ++-- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/fpu/regset.c | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7c980aafb8aa..e3221ffa304e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -24,8 +24,8 @@ * High level FPU state handling functions: */ extern void fpu__initialize(struct fpu *fpu); -extern void fpu__activate_fpstate_read(struct fpu *fpu); -extern void fpu__activate_fpstate_write(struct fpu *fpu); +extern void fpu__prepare_read(struct fpu *fpu); +extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 07db9d94b68b..f92a6593de1e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(fpu__initialize); * * If the task has used the FPU before then save it. */ -void fpu__activate_fpstate_read(struct fpu *fpu) +void fpu__prepare_read(struct fpu *fpu) { if (fpu == ¤t->thread.fpu) { fpu__save(fpu); @@ -283,7 +283,7 @@ void fpu__activate_fpstate_read(struct fpu *fpu) * state pending on its former CPU could be restored, corrupting * the modifications. */ -void fpu__activate_fpstate_write(struct fpu *fpu) +void fpu__prepare_write(struct fpu *fpu) { /* * Only stopped child tasks can be used to modify the FPU diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 19e82334e811..ee8d2f049818 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (!boot_cpu_has(X86_FEATURE_FXSR)) return -ENODEV; - fpu__activate_fpstate_read(fpu); + fpu__prepare_read(fpu); fpstate_sanitize_xstate(fpu); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!boot_cpu_has(X86_FEATURE_FXSR)) return -ENODEV; - fpu__activate_fpstate_write(fpu); + fpu__prepare_write(fpu); fpstate_sanitize_xstate(fpu); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -89,7 +89,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, xsave = &fpu->state.xsave; - fpu__activate_fpstate_read(fpu); + fpu__prepare_read(fpu); if (using_compacted_format()) { if (kbuf) @@ -132,7 +132,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, xsave = &fpu->state.xsave; - fpu__activate_fpstate_write(fpu); + fpu__prepare_write(fpu); if (boot_cpu_has(X86_FEATURE_XSAVES)) { if (kbuf) @@ -310,7 +310,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; - fpu__activate_fpstate_read(fpu); + fpu__prepare_read(fpu); if (!boot_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); @@ -340,7 +340,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - fpu__activate_fpstate_write(fpu); + fpu__prepare_write(fpu); fpstate_sanitize_xstate(fpu); if (!boot_cpu_has(X86_FEATURE_FPU)) -- cgit From e63e5d5c15c6b1dba26f7cbd1b1089a1d6155db5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:04 +0200 Subject: x86/fpu: Introduce validate_xstate_header() Move validation of user-supplied xstate_header into a helper function, in preparation of calling it from both the ptrace and sigreturn syscall paths. The new function also considers it to be an error if *any* reserved bits are set, whereas before we were just clearing most of them silently. This should reduce the chance of bugs that fail to correctly validate user-supplied XSAVE areas. It also will expose any broken userspace programs that set the other reserved bits; this is desirable because such programs will lose compatibility with future CPUs and kernels if those bits are ever used for anything. (There shouldn't be any such programs, and in fact in the case where the compacted format is in use we were already validating xfeatures. But you never know...) Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-2-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/xstate.h | 4 ++++ arch/x86/kernel/fpu/xstate.c | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 579ac2358e63..83fee2469eb7 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -52,4 +52,8 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); + +/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ +extern int validate_xstate_header(const struct xstate_header *hdr); + #endif diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 703e76d027ee..2427aeea33b5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -483,6 +483,30 @@ int using_compacted_format(void) return boot_cpu_has(X86_FEATURE_XSAVES); } +/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ +int validate_xstate_header(const struct xstate_header *hdr) +{ + /* No unknown or supervisor features may be set */ + if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR)) + return -EINVAL; + + /* Userspace must use the uncompacted format */ + if (hdr->xcomp_bv) + return -EINVAL; + + /* + * If 'reserved' is shrunken to add a new field, make sure to validate + * that new field here! + */ + BUILD_BUG_ON(sizeof(hdr->reserved) != 48); + + /* No reserved bits may be set */ + if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) + return -EINVAL; + + return 0; +} + static void __xstate_dump_leaves(void) { int i; -- cgit From cf9df81b139b6ebaec188d73758f02ca3b2110e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:05 +0200 Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header in xstateregs_set() Tighten the checks in xstateregs_set(). Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-3-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/regset.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index ee8d2f049818..b831d5b9de99 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -141,27 +141,20 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, ret = copy_user_to_xstate(xsave, ubuf); } else { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - - /* xcomp_bv must be 0 when using uncompacted format */ - if (!ret && xsave->header.xcomp_bv) - ret = -EINVAL; + if (!ret) + ret = validate_xstate_header(&xsave->header); } - /* - * In case of failure, mark all states as init: - */ - if (ret) - fpstate_init(&fpu->state); - /* * mxcsr reserved bits must be masked to zero for security reasons. */ xsave->i387.mxcsr &= mxcsr_feature_mask; - xsave->header.xfeatures &= xfeatures_mask; + /* - * These bits must be zero. + * In case of failure, mark all states as init: */ - memset(&xsave->header.reserved, 0, 48); + if (ret) + fpstate_init(&fpu->state); return ret; } -- cgit From b11e2e18a7fc8eaa3d592c260d50c7129e094ded Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:06 +0200 Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header in __fpu__restore_sig() Tighten the checks in __fpu__restore_sig() and update comments. Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-4-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/signal.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 7fa3bdb331e9..fb639e70048f 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -214,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk, struct xstate_header *header = &xsave->header; if (use_xsave()) { - /* These bits must be zero. */ - memset(header->reserved, 0, 48); + /* + * Note: we don't need to zero the reserved bits in the + * xstate_header here because we either didn't copy them at all, + * or we checked earlier that they aren't set. + */ /* * Init the state that is not present in the memory @@ -224,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk, if (fx_only) header->xfeatures = XFEATURE_MASK_FPSSE; else - header->xfeatures &= (xfeatures_mask & xfeatures); + header->xfeatures &= xfeatures; } if (use_fxsr()) { @@ -308,7 +311,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) /* * For 32-bit frames with fxstate, copy the user state to the * thread's fpu state, reconstruct fxstate from the fsave - * header. Sanitize the copied state etc. + * header. Validate and sanitize the copied state. */ struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; @@ -329,9 +332,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else { err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); - /* xcomp_bv must be 0 when using uncompacted format */ - if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv) - err = -EINVAL; + if (!err && state_size > offsetof(struct xregs_state, header)) + err = validate_xstate_header(&fpu->state.xsave.header); } if (err || __copy_from_user(&env, buf, sizeof(env))) { -- cgit From 80d8ae86b36791a545ca28ddc95133ea59bba6e0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:07 +0200 Subject: x86/fpu: Copy the full state_header in copy_kernel_to_xstate() This is in preparation to verify the full xstate header as supplied by user-space. Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-5-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 2427aeea33b5..02591b96bb25 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1148,11 +1148,13 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) int i; u64 xfeatures; u64 allowed_features; + struct xstate_header hdr; offset = offsetof(struct xregs_state, header); - size = sizeof(xfeatures); + size = sizeof(hdr); - memcpy(&xfeatures, kbuf + offset, size); + memcpy(&hdr, kbuf + offset, size); + xfeatures = hdr.xfeatures; /* * Reject if the user sets any disabled or supervisor features: -- cgit From b89eda482d7849a1c146b6d0a42f4e76369bb08e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:08 +0200 Subject: x86/fpu: Eliminate the 'xfeatures' local variable in copy_kernel_to_xstate() We have this information in the xstate_header. Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-6-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 02591b96bb25..c97c4a9db52a 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1146,7 +1146,6 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) { unsigned int offset, size; int i; - u64 xfeatures; u64 allowed_features; struct xstate_header hdr; @@ -1154,20 +1153,19 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) size = sizeof(hdr); memcpy(&hdr, kbuf + offset, size); - xfeatures = hdr.xfeatures; /* * Reject if the user sets any disabled or supervisor features: */ allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; - if (xfeatures & ~allowed_features) + if (hdr.xfeatures & ~allowed_features) return -EINVAL; for (i = 0; i < XFEATURE_MAX; i++) { u64 mask = ((u64)1 << i); - if (xfeatures & mask) { + if (hdr.xfeatures & mask) { void *dst = __raw_xsave_addr(xsave, 1 << i); offset = xstate_offsets[i]; @@ -1177,7 +1175,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) } } - if (xfeatures_mxcsr_quirk(xfeatures)) { + if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { offset = offsetof(struct fxregs_state, mxcsr); size = MXCSR_AND_FLAGS_SIZE; memcpy(&xsave->i387.mxcsr, kbuf + offset, size); @@ -1192,7 +1190,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) /* * Add back in the features that came in from userspace: */ - xsave->header.xfeatures |= xfeatures; + xsave->header.xfeatures |= hdr.xfeatures; return 0; } -- cgit From af95774b3ca080b0e1e651c0fc7680f3444ddda7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:09 +0200 Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header in copy_kernel_to_xstate() Tighten the checks in copy_kernel_to_xstate(). Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-7-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c97c4a9db52a..325db7850335 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1138,15 +1138,12 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i /* * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format - * and copy to the target thread. This is called from xstateregs_set() and - * there we check the CPU has XSAVES and a whole standard-sized buffer - * exists. + * and copy to the target thread. This is called from xstateregs_set(). */ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) { unsigned int offset, size; int i; - u64 allowed_features; struct xstate_header hdr; offset = offsetof(struct xregs_state, header); @@ -1154,12 +1151,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) memcpy(&hdr, kbuf + offset, size); - /* - * Reject if the user sets any disabled or supervisor features: - */ - allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; - - if (hdr.xfeatures & ~allowed_features) + if (validate_xstate_header(&hdr)) return -EINVAL; for (i = 0; i < XFEATURE_MAX; i++) { -- cgit From af2c4322d986a08a6e793b74b83a62b325019c20 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:10 +0200 Subject: x86/fpu: Copy the full header in copy_user_to_xstate() This is in preparation to verify the full xstate header as supplied by user-space. Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-8-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 325db7850335..0cd7b73c25e8 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1199,13 +1199,16 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) int i; u64 xfeatures; u64 allowed_features; + struct xstate_header hdr; offset = offsetof(struct xregs_state, header); - size = sizeof(xfeatures); + size = sizeof(hdr); - if (__copy_from_user(&xfeatures, ubuf + offset, size)) + if (__copy_from_user(&hdr, ubuf + offset, size)) return -EFAULT; + xfeatures = hdr.xfeatures; + /* * Reject if the user sets any disabled or supervisor features: */ -- cgit From 3d703477bcfe8bb57079d97198cf1e342fe1fef9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:11 +0200 Subject: x86/fpu: Eliminate the 'xfeatures' local variable in copy_user_to_xstate() We now have this field in hdr.xfeatures. Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-9-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0cd7b73c25e8..b6d78b78b5c2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1197,7 +1197,6 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) { unsigned int offset, size; int i; - u64 xfeatures; u64 allowed_features; struct xstate_header hdr; @@ -1207,20 +1206,18 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) if (__copy_from_user(&hdr, ubuf + offset, size)) return -EFAULT; - xfeatures = hdr.xfeatures; - /* * Reject if the user sets any disabled or supervisor features: */ allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; - if (xfeatures & ~allowed_features) + if (hdr.xfeatures & ~allowed_features) return -EINVAL; for (i = 0; i < XFEATURE_MAX; i++) { u64 mask = ((u64)1 << i); - if (xfeatures & mask) { + if (hdr.xfeatures & mask) { void *dst = __raw_xsave_addr(xsave, 1 << i); offset = xstate_offsets[i]; @@ -1231,7 +1228,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) } } - if (xfeatures_mxcsr_quirk(xfeatures)) { + if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { offset = offsetof(struct fxregs_state, mxcsr); size = MXCSR_AND_FLAGS_SIZE; if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size)) @@ -1247,7 +1244,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) /* * Add back in the features that came in from userspace: */ - xsave->header.xfeatures |= xfeatures; + xsave->header.xfeatures |= hdr.xfeatures; return 0; } -- cgit From 98c0fad9d60e8b2cd47e15b7bee7df343648f5bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:12 +0200 Subject: x86/fpu: Use validate_xstate_header() to validate the xstate_header in copy_user_to_xstate() Tighten the checks in copy_user_to_xstate(). Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-10-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b6d78b78b5c2..f1d5476c9022 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1188,16 +1188,15 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) } /* - * Convert from a ptrace standard-format user-space buffer to kernel XSAVES format - * and copy to the target thread. This is called from xstateregs_set() and - * there we check the CPU has XSAVES and a whole standard-sized buffer - * exists. + * Convert from a ptrace or sigreturn standard-format user-space buffer to + * kernel XSAVES format and copy to the target thread. This is called from + * xstateregs_set(), as well as potentially from the sigreturn() and + * rt_sigreturn() system calls. */ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) { unsigned int offset, size; int i; - u64 allowed_features; struct xstate_header hdr; offset = offsetof(struct xregs_state, header); @@ -1206,12 +1205,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) if (__copy_from_user(&hdr, ubuf + offset, size)) return -EFAULT; - /* - * Reject if the user sets any disabled or supervisor features: - */ - allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; - - if (hdr.xfeatures & ~allowed_features) + if (validate_xstate_header(&hdr)) return -EINVAL; for (i = 0; i < XFEATURE_MAX; i++) { -- cgit From 738f48cb5fdd5878d11934f1898aa2bcf1578289 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2017 12:59:13 +0200 Subject: x86/fpu: Use using_compacted_format() instead of open coded X86_FEATURE_XSAVES This is the canonical method to use. Signed-off-by: Eric Biggers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Eric Biggers Cc: Fenghua Yu Cc: Kees Cook Cc: Kevin Hao Cc: Linus Torvalds Cc: Michael Halcrow Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yu-cheng Yu Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20170924105913.9157-11-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/regset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index b831d5b9de99..3ea151372389 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -134,7 +134,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, fpu__prepare_write(fpu); - if (boot_cpu_has(X86_FEATURE_XSAVES)) { + if (using_compacted_format()) { if (kbuf) ret = copy_kernel_to_xstate(xsave, kbuf); else -- cgit From 5371513fb338fb9989c569dc071326d369d6ade8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 26 Sep 2017 15:57:16 +0100 Subject: arm64: Make sure SPsel is always set When the kernel is entered at EL2 on an ARMv8.0 system, we construct the EL1 pstate and make sure this uses the the EL1 stack pointer (we perform an exception return to EL1h). But if the kernel is either entered at EL1 or stays at EL2 (because we're on a VHE-capable system), we fail to set SPsel, and use whatever stack selection the higher exception level has choosen for us. Let's not take any chance, and make sure that SPsel is set to one before we decide the mode we're going to run in. Cc: Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7434ec0c7a27..0b243ecaf7ac 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -384,6 +384,7 @@ ENTRY(kimage_vaddr) * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) + msr SPsel, #1 // We want to use SP_EL{1,2} mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq 1f -- cgit From cd39e1176d320157831ce030b4c869bd2d5eb142 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 Jun 2017 12:57:04 +0200 Subject: KVM: VMX: extract __pi_post_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simple code movement patch, preparing for the next one. Cc: Huangweidong Cc: Gonglei Cc: wangxin Cc: Radim Krčmář Tested-by: Longpeng (Mike) Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 71 +++++++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c83d28b0ab05..0002b14307ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11705,6 +11705,43 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); } +static void __pi_post_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + unsigned long flags; + + do { + old.control = new.control = pi_desc->control; + + dest = cpu_physical_id(vcpu->cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* Allow posting non-urgent interrupts */ + new.sn = 0; + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + + if(vcpu->pre_pcpu != -1) { + spin_lock_irqsave( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock_irqrestore( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + vcpu->pre_pcpu = -1; + } +} + /* * This routine does the following things for vCPU which is going * to be blocked if VT-d PI is enabled. @@ -11798,44 +11835,12 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) static void pi_post_block(struct kvm_vcpu *vcpu) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - unsigned long flags; - if (!kvm_arch_has_assigned_device(vcpu->kvm) || !irq_remapping_cap(IRQ_POSTING_CAP) || !kvm_vcpu_apicv_active(vcpu)) return; - do { - old.control = new.control = pi_desc->control; - - dest = cpu_physical_id(vcpu->cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* Allow posting non-urgent interrupts */ - new.sn = 0; - - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); - - if(vcpu->pre_pcpu != -1) { - spin_lock_irqsave( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - vcpu->pre_pcpu = -1; - } + __pi_post_block(vcpu); } static void vmx_post_block(struct kvm_vcpu *vcpu) -- cgit From 8b306e2f3c41939ea528e6174c88cfbfff893ce1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 Jun 2017 12:57:05 +0200 Subject: KVM: VMX: avoid double list add with VT-d posted interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases, for example involving hot-unplug of assigned devices, pi_post_block can forget to remove the vCPU from the blocked_vcpu_list. When this happens, the next call to pi_pre_block corrupts the list. Fix this in two ways. First, check vcpu->pre_pcpu in pi_pre_block and WARN instead of adding the element twice in the list. Second, always do the list removal in pi_post_block if vcpu->pre_pcpu is set (not -1). The new code keeps interrupts disabled for the whole duration of pi_pre_block/pi_post_block. This is not strictly necessary, but easier to follow. For the same reason, PI.ON is checked only after the cmpxchg, and to handle it we just call the post-block code. This removes duplication of the list removal code. Cc: Huangweidong Cc: Gonglei Cc: wangxin Cc: Radim Krčmář Tested-by: Longpeng (Mike) Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 62 ++++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0002b14307ab..0bfe97e50a40 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11710,10 +11710,11 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct pi_desc old, new; unsigned int dest; - unsigned long flags; do { old.control = new.control = pi_desc->control; + WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, + "Wakeup handler not enabled while the VCPU is blocked\n"); dest = cpu_physical_id(vcpu->cpu); @@ -11730,14 +11731,10 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) } while (cmpxchg(&pi_desc->control, old.control, new.control) != old.control); - if(vcpu->pre_pcpu != -1) { - spin_lock_irqsave( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); vcpu->pre_pcpu = -1; } } @@ -11757,7 +11754,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) */ static int pi_pre_block(struct kvm_vcpu *vcpu) { - unsigned long flags; unsigned int dest; struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -11767,34 +11763,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) !kvm_vcpu_apicv_active(vcpu)) return 0; - vcpu->pre_pcpu = vcpu->cpu; - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + WARN_ON(irqs_disabled()); + local_irq_disable(); + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { + vcpu->pre_pcpu = vcpu->cpu; + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + } do { old.control = new.control = pi_desc->control; - /* - * We should not block the vCPU if - * an interrupt is posted for it. - */ - if (pi_test_on(pi_desc) == 1) { - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - vcpu->pre_pcpu = -1; - - return 1; - } - WARN((pi_desc->sn == 1), "Warning: SN field of posted-interrupts " "is set before blocking\n"); @@ -11819,7 +11801,12 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) } while (cmpxchg(&pi_desc->control, old.control, new.control) != old.control); - return 0; + /* We should not block the vCPU if an interrupt is posted for it. */ + if (pi_test_on(pi_desc) == 1) + __pi_post_block(vcpu); + + local_irq_enable(); + return (vcpu->pre_pcpu == -1); } static int vmx_pre_block(struct kvm_vcpu *vcpu) @@ -11835,12 +11822,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) static void pi_post_block(struct kvm_vcpu *vcpu) { - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (vcpu->pre_pcpu == -1) return; + WARN_ON(irqs_disabled()); + local_irq_disable(); __pi_post_block(vcpu); + local_irq_enable(); } static void vmx_post_block(struct kvm_vcpu *vcpu) -- cgit From 31afb2ea2b10a7d17ce3db4cdb0a12b63b2fe08a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 Jun 2017 12:57:06 +0200 Subject: KVM: VMX: simplify and fix vmx_vcpu_pi_load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The simplify part: do not touch pi_desc.nv, we can set it when the VCPU is first created. Likewise, pi_desc.sn is only handled by vmx_vcpu_pi_load, do not touch it in __pi_post_block. The fix part: do not check kvm_arch_has_assigned_device, instead check the SN bit to figure out whether vmx_vcpu_pi_put ran before. This matches what the previous patch did in pi_post_block. Cc: Huangweidong Cc: Gonglei Cc: wangxin Cc: Radim Krčmář Tested-by: Longpeng (Mike) Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 68 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0bfe97e50a40..b9d2140eb212 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2202,43 +2202,41 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) struct pi_desc old, new; unsigned int dest; - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + /* + * In case of hot-plug or hot-unplug, we may have to undo + * vmx_vcpu_pi_put even if there is no assigned device. And we + * always keep PI.NDST up to date for simplicity: it makes the + * code easier, and CPU migration is not a fast path. + */ + if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) + return; + + /* + * First handle the simple case where no cmpxchg is necessary; just + * allow posting non-urgent interrupts. + * + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change + * PI.NDST: pi_post_block will do it for us and the wakeup_handler + * expects the VCPU to be on the blocked_vcpu_list that matches + * PI.NDST. + */ + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || + vcpu->cpu == cpu) { + pi_clear_sn(pi_desc); return; + } + /* The full case. */ do { old.control = new.control = pi_desc->control; - /* - * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there - * are two possible cases: - * 1. After running 'pre_block', context switch - * happened. For this case, 'sn' was set in - * vmx_vcpu_put(), so we need to clear it here. - * 2. After running 'pre_block', we were blocked, - * and woken up by some other guy. For this case, - * we don't need to do anything, 'pi_post_block' - * will do everything for us. However, we cannot - * check whether it is case #1 or case #2 here - * (maybe, not needed), so we also clear sn here, - * I think it is not a big deal. - */ - if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { - if (vcpu->cpu != cpu) { - dest = cpu_physical_id(cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - } + dest = cpu_physical_id(cpu); - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; - /* Allow posting non-urgent interrupts */ new.sn = 0; } while (cmpxchg(&pi_desc->control, old.control, new.control) != old.control); @@ -9592,6 +9590,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + /* + * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR + * or POSTED_INTR_WAKEUP_VECTOR. + */ + vmx->pi_desc.nv = POSTED_INTR_VECTOR; + vmx->pi_desc.sn = 1; + return &vmx->vcpu; free_vmcs: @@ -11723,9 +11728,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) else new.ndst = (dest << 8) & 0xFF00; - /* Allow posting non-urgent interrupts */ - new.sn = 0; - /* set 'NV' to 'notification vector' */ new.nv = POSTED_INTR_VECTOR; } while (cmpxchg(&pi_desc->control, old.control, -- cgit From 0d805ee70a69eabd38160dc199e183ac2f13fe4b Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 27 Sep 2017 02:41:25 -0700 Subject: xen/mmu: Call xen_cleanhighmap() with 4MB aligned for page tables mapping When bootup a PVM guest with large memory(Ex.240GB), XEN provided initial mapping overlaps with kernel module virtual space. When mapping in this space is cleared by xen_cleanhighmap(), in certain case there could be an 2MB mapping left. This is due to XEN initialize 4MB aligned mapping but xen_cleanhighmap() finish at 2MB boundary. When module loading is just on top of the 2MB space, got below warning: WARNING: at mm/vmalloc.c:106 vmap_pte_range+0x14e/0x190() Call Trace: [] warn_alloc_failed+0xf3/0x160 [] __vmalloc_area_node+0x182/0x1c0 [] ? module_alloc_update_bounds+0x1e/0x80 [] __vmalloc_node_range+0xa7/0x110 [] ? module_alloc_update_bounds+0x1e/0x80 [] module_alloc+0x64/0x70 [] ? module_alloc_update_bounds+0x1e/0x80 [] module_alloc_update_bounds+0x1e/0x80 [] move_module+0x27/0x150 [] layout_and_allocate+0x120/0x1b0 [] load_module+0x78/0x640 [] ? security_file_permission+0x8b/0x90 [] sys_init_module+0x62/0x1e0 [] system_call_fastpath+0x16/0x1b Then the mapping of 2MB is cleared, finally oops when the page in that space is accessed. BUG: unable to handle kernel paging request at ffff880022600000 IP: [] clear_page_c_e+0x7/0x10 PGD 1788067 PUD 178c067 PMD 22434067 PTE 0 Oops: 0002 [#1] SMP Call Trace: [] ? prep_new_page+0x127/0x1c0 [] get_page_from_freelist+0x1e2/0x550 [] ? ii_iovec_copy_to_user+0x90/0x140 [] __alloc_pages_nodemask+0x12d/0x230 [] alloc_pages_vma+0xc6/0x1a0 [] ? pte_mfn_to_pfn+0x7d/0x100 [] do_anonymous_page+0x16b/0x350 [] handle_pte_fault+0x1e4/0x200 [] ? xen_pmd_val+0xe/0x10 [] ? __raw_callee_save_xen_pmd_val+0x11/0x1e [] handle_mm_fault+0x15b/0x270 [] do_page_fault+0x140/0x470 [] page_fault+0x25/0x30 Call xen_cleanhighmap() with 4MB aligned for page tables mapping to fix it. The unnecessory call of xen_cleanhighmap() in DEBUG mode is also removed. -v2: add comment about XEN alignment from Juergen. References: https://lists.xen.org/archives/html/xen-devel/2012-07/msg01562.html Signed-off-by: Zhenzhong Duan Reviewed-by: Juergen Gross [boris: added 'xen/mmu' tag to commit subject] Signed-off-by: Boris Ostrovsky --- arch/x86/xen/mmu_pv.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 509f560bd0c6..58b09fcadbaa 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void) * from _brk_limit way up to the max_pfn_mapped (which is the end of * the ramdisk). We continue on, erasing PMD entries that point to page * tables - do note that they are accessible at this stage via __va. - * For good measure we also round up to the PMD - which means that if + * As Xen is aligning the memory end to a 4MB boundary, for good + * measure we also round up to PMD_SIZE * 2 - which means that if * anybody is using __ka address to the initial boot-stack - and try * to use it - they are going to crash. The xen_start_info has been * taken care of already in xen_setup_kernel_pagetable. */ addr = xen_start_info->pt_base; - size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); + size = xen_start_info->nr_pt_frames * PAGE_SIZE; - xen_cleanhighmap(addr, addr + size); + xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2)); xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); -#ifdef DEBUG - /* This is superfluous and is not necessary, but you know what - * lets do it. The MODULES_VADDR -> MODULES_END should be clear of - * anything at this stage. */ - xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); -#endif } #endif -- cgit From c0a1666bcb2a33e84187a15eabdcd54056be9a97 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Sep 2017 17:58:41 +0200 Subject: KVM: VMX: use cmpxchg64 This fixes a compilation failure on 32-bit systems. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b9d2140eb212..7f62c94196d1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2238,8 +2238,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) new.ndst = (dest << 8) & 0xFF00; new.sn = 0; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); } static void decache_tsc_multiplier(struct vcpu_vmx *vmx) @@ -11730,8 +11730,8 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) /* set 'NV' to 'notification vector' */ new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); @@ -11800,8 +11800,8 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) /* set 'NV' to 'wakeup vector' */ new.nv = POSTED_INTR_WAKEUP_VECTOR; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); /* We should not block the vCPU if an interrupt is posted for it. */ if (pi_test_on(pi_desc) == 1) -- cgit From 69b73e95982649a1f2dc63b8f08f2113d28f7fed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Sep 2017 10:07:44 +0200 Subject: um/time: Fixup namespace collision The new timer_setup() function for struct timer_list collides with a private um function. Rename it. Fixes: 686fef928bba ("timer: Prepare to change timer callback argument type") Signed-off-by: Thomas Gleixner Cc: Richard Weinberger Cc: Jeff Dike Cc: user-mode-linux-devel@lists.sourceforge.net Cc: Kees Cook --- arch/um/kernel/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 0b034ebbda2a..7f69d17de354 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -98,7 +98,7 @@ static struct clocksource timer_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init timer_setup(void) +static void __init um_timer_setup(void) { int err; @@ -132,5 +132,5 @@ void read_persistent_clock(struct timespec *ts) void __init time_init(void) { timer_set_signal_handler(); - late_time_init = timer_setup; + late_time_init = um_timer_setup; } -- cgit From 520a13c530aeb5f63e011d668c42db1af19ed349 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 28 Sep 2017 16:58:26 -0500 Subject: x86/asm: Fix inline asm call constraints for GCC 4.4 The kernel test bot (run by Xiaolong Ye) reported that the following commit: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") is causing double faults in a kernel compiled with GCC 4.4. Linus subsequently diagnosed the crash pattern and the buggy commit and found that the issue is with this code: register unsigned int __asm_call_sp asm("esp"); #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) Even on a 64-bit kernel, it's using ESP instead of RSP. That causes GCC to produce the following bogus code: ffffffff8147461d: 89 e0 mov %esp,%eax ffffffff8147461f: 4c 89 f7 mov %r14,%rdi ffffffff81474622: 4c 89 fe mov %r15,%rsi ffffffff81474625: ba 20 00 00 00 mov $0x20,%edx ffffffff8147462a: 89 c4 mov %eax,%esp ffffffff8147462c: e8 bf 52 05 00 callq ffffffff814c98f0 Despite the absurdity of it backing up and restoring the stack pointer for no reason, the bug is actually the fact that it's only backing up and restoring the lower 32 bits of the stack pointer. The upper 32 bits are getting cleared out, corrupting the stack pointer. So change the '__asm_call_sp' register variable to be associated with the actual full-size stack pointer. This also requires changing the __ASM_SEL() macro to be based on the actual compiled arch size, rather than the CONFIG value, because CONFIG_X86_64 compiles some files with '-m32' (e.g., realmode and vdso). Otherwise Clang fails to build the kernel because it complains about the use of a 64-bit register (RSP) in a 32-bit file. Reported-and-Bisected-and-Tested-by: kernel test robot Diagnosed-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dmitriy Vyukov Cc: LKP Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") Link: http://lkml.kernel.org/r/20170928215826.6sdpmwtkiydiytim@treble Signed-off-by: Ingo Molnar --- arch/x86/include/asm/asm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index c1eadbaf1115..30c3c9ac784a 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -11,10 +11,12 @@ # define __ASM_FORM_COMMA(x) " " #x "," #endif -#ifdef CONFIG_X86_32 +#ifndef __x86_64__ +/* 32 bit */ # define __ASM_SEL(a,b) __ASM_FORM(a) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else +/* 64 bit */ # define __ASM_SEL(a,b) __ASM_FORM(b) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif @@ -139,7 +141,7 @@ * gets set up by the containing function. If you forget to do this, objtool * may print a "call without frame pointer save/setup" warning. */ -register unsigned int __asm_call_sp asm("esp"); +register unsigned long __asm_call_sp asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) #endif -- cgit From 305d0ab4764d36a02c8e7cddb67099aca65351ce Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 28 Sep 2017 18:16:44 -0700 Subject: KVM: nVMX: Fix nested #PF intends to break L1's vmlauch/vmresume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------[ cut here ]------------ WARNING: CPU: 4 PID: 5280 at /home/kernel/linux/arch/x86/kvm//vmx.c:11394 nested_vmx_vmexit+0xc2b/0xd70 [kvm_intel] CPU: 4 PID: 5280 Comm: qemu-system-x86 Tainted: G W OE 4.13.0+ #17 RIP: 0010:nested_vmx_vmexit+0xc2b/0xd70 [kvm_intel] Call Trace: ? emulator_read_emulated+0x15/0x20 [kvm] ? segmented_read+0xae/0xf0 [kvm] vmx_inject_page_fault_nested+0x60/0x70 [kvm_intel] ? vmx_inject_page_fault_nested+0x60/0x70 [kvm_intel] x86_emulate_instruction+0x733/0x810 [kvm] vmx_handle_exit+0x2f4/0xda0 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0xd2f/0x1c60 [kvm] kvm_arch_vcpu_ioctl_run+0xdab/0x1c60 [kvm] ? kvm_arch_vcpu_load+0x62/0x230 [kvm] kvm_vcpu_ioctl+0x340/0x700 [kvm] ? kvm_vcpu_ioctl+0x340/0x700 [kvm] ? __fget+0xfc/0x210 do_vfs_ioctl+0xa4/0x6a0 ? __fget+0x11d/0x210 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc2 A nested #PF is triggered during L0 emulating instruction for L2. However, it doesn't consider we should not break L1's vmlauch/vmresme. This patch fixes it by queuing the #PF exception instead ,requesting an immediate VM exit from L2 and keeping the exception for L1 pending for a subsequent nested VM exit. This should actually work all the time, making vmx_inject_page_fault_nested totally unnecessary. However, that's not working yet, so this patch can work around the issue in the meanwhile. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7f62c94196d1..5bfa353f6354 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9845,7 +9845,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, WARN_ON(!is_guest_mode(vcpu)); - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && + !to_vmx(vcpu)->nested.nested_run_pending) { vmcs12->vm_exit_intr_error_code = fault->error_code; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | -- cgit From b862789aa5186d5ea3a024b7cfe0f80c3a38b980 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 29 Sep 2017 19:01:45 +0800 Subject: kvm/x86: Handle async PF in RCU read-side critical sections Sasha Levin reported a WARNING: | WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329 | rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline] | WARNING: CPU: 0 PID: 6974 at kernel/rcu/tree_plugin.h:329 | rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458 ... | CPU: 0 PID: 6974 Comm: syz-fuzzer Not tainted 4.13.0-next-20170908+ #246 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS | 1.10.1-1ubuntu1 04/01/2014 | Call Trace: ... | RIP: 0010:rcu_preempt_note_context_switch kernel/rcu/tree_plugin.h:329 [inline] | RIP: 0010:rcu_note_context_switch+0x16c/0x2210 kernel/rcu/tree.c:458 | RSP: 0018:ffff88003b2debc8 EFLAGS: 00010002 | RAX: 0000000000000001 RBX: 1ffff1000765bd85 RCX: 0000000000000000 | RDX: 1ffff100075d7882 RSI: ffffffffb5c7da20 RDI: ffff88003aebc410 | RBP: ffff88003b2def30 R08: dffffc0000000000 R09: 0000000000000001 | R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003b2def08 | R13: 0000000000000000 R14: ffff88003aebc040 R15: ffff88003aebc040 | __schedule+0x201/0x2240 kernel/sched/core.c:3292 | schedule+0x113/0x460 kernel/sched/core.c:3421 | kvm_async_pf_task_wait+0x43f/0x940 arch/x86/kernel/kvm.c:158 | do_async_page_fault+0x72/0x90 arch/x86/kernel/kvm.c:271 | async_page_fault+0x22/0x30 arch/x86/entry/entry_64.S:1069 | RIP: 0010:format_decode+0x240/0x830 lib/vsprintf.c:1996 | RSP: 0018:ffff88003b2df520 EFLAGS: 00010283 | RAX: 000000000000003f RBX: ffffffffb5d1e141 RCX: ffff88003b2df670 | RDX: 0000000000000001 RSI: dffffc0000000000 RDI: ffffffffb5d1e140 | RBP: ffff88003b2df560 R08: dffffc0000000000 R09: 0000000000000000 | R10: ffff88003b2df718 R11: 0000000000000000 R12: ffff88003b2df5d8 | R13: 0000000000000064 R14: ffffffffb5d1e140 R15: 0000000000000000 | vsnprintf+0x173/0x1700 lib/vsprintf.c:2136 | sprintf+0xbe/0xf0 lib/vsprintf.c:2386 | proc_self_get_link+0xfb/0x1c0 fs/proc/self.c:23 | get_link fs/namei.c:1047 [inline] | link_path_walk+0x1041/0x1490 fs/namei.c:2127 ... This happened when the host hit a page fault, and delivered it as in an async page fault, while the guest was in an RCU read-side critical section. The guest then tries to reschedule in kvm_async_pf_task_wait(), but rcu_preempt_note_context_switch() would treat the reschedule as a sleep in RCU read-side critical section, which is not allowed (even in preemptible RCU). Thus the WARN. To cure this, make kvm_async_pf_task_wait() go to the halt path if the PF happens in a RCU read-side critical section. Reported-by: Sasha Levin Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: stable@vger.kernel.org Signed-off-by: Boqun Feng Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index aa60a08b65b1..e675704fa6f7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || preempt_count() > 1; + n.halted = is_idle_task(current) || preempt_count() > 1 || + rcu_preempt_depth(); init_swait_queue_head(&n.wq); hlist_add_head(&n.link, &b->list); raw_spin_unlock(&b->lock); -- cgit From f069faba688701c4d56b6c3452a130f97bf02e95 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 Sep 2017 11:29:55 +0100 Subject: arm64: mm: Use READ_ONCE when dereferencing pointer to pte table On kernels built with support for transparent huge pages, different CPUs can access the PMD concurrently due to e.g. fast GUP or page_vma_mapped_walk and they must take care to use READ_ONCE to avoid value tearing or caching of stale values by the compiler. Unfortunately, these functions call into our pgtable macros, which don't use READ_ONCE, and compiler caching has been observed to cause the following crash during ext4 writeback: PC is at check_pte+0x20/0x170 LR is at page_vma_mapped_walk+0x2e0/0x540 [...] Process doio (pid: 2463, stack limit = 0xffff00000f2e8000) Call trace: [] check_pte+0x20/0x170 [] page_vma_mapped_walk+0x2e0/0x540 [] page_mkclean_one+0xac/0x278 [] rmap_walk_file+0xf0/0x238 [] rmap_walk+0x64/0xa0 [] page_mkclean+0x90/0xa8 [] clear_page_dirty_for_io+0x84/0x2a8 [] mpage_submit_page+0x34/0x98 [] mpage_process_page_bufs+0x164/0x170 [] mpage_prepare_extent_to_map+0x134/0x2b8 [] ext4_writepages+0x484/0xe30 [] do_writepages+0x44/0xe8 [] __filemap_fdatawrite_range+0xbc/0x110 [] file_write_and_wait_range+0x48/0xd8 [] ext4_sync_file+0x80/0x4b8 [] vfs_fsync_range+0x64/0xc0 [] SyS_msync+0x194/0x1e8 This is because page_vma_mapped_walk loads the PMD twice before calling pte_offset_map: the first time without READ_ONCE (where it gets all zeroes due to a concurrent pmdp_invalidate) and the second time with READ_ONCE (where it sees a valid table pointer due to a concurrent pmd_populate). However, the compiler inlines everything and caches the first value in a register, which is subsequently used in pte_offset_phys which returns a junk pointer that is later dereferenced when attempting to access the relevant pte. This patch fixes the issue by using READ_ONCE in pte_offset_phys to ensure that a stale value is not used. Whilst this is a point fix for a known failure (and simple to backport), a full fix moving all of our page table accessors over to {READ,WRITE}_ONCE and consistently using READ_ONCE in page_vma_mapped_walk is in the works for a future kernel release. Cc: Jon Masters Cc: Timur Tabi Cc: Fixes: f27176cfc363 ("mm: convert page_mkclean_one() to use page_vma_mapped_walk()") Tested-by: Richard Ruigrok Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index bc4e92337d16..b46e54c2399b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -401,7 +401,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) /* Find an entry in the third-level page table. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t)) +#define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) #define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -- cgit From 760bfb47c36a07741a089bf6a28e854ffbee7dc9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 Sep 2017 12:27:41 +0100 Subject: arm64: fault: Route pte translation faults via do_translation_fault We currently route pte translation faults via do_page_fault, which elides the address check against TASK_SIZE before invoking the mm fault handling code. However, this can cause issues with the path walking code in conjunction with our word-at-a-time implementation because load_unaligned_zeropad can end up faulting in kernel space if it reads across a page boundary and runs into a page fault (e.g. by attempting to read from a guard region). In the case of such a fault, load_unaligned_zeropad has registered a fixup to shift the valid data and pad with zeroes, however the abort is reported as a level 3 translation fault and we dispatch it straight to do_page_fault, despite it being a kernel address. This results in calling a sleeping function from atomic context: BUG: sleeping function called from invalid context at arch/arm64/mm/fault.c:313 in_atomic(): 0, irqs_disabled(): 0, pid: 10290 Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [...] [] ___might_sleep+0x134/0x144 [] __might_sleep+0x7c/0x8c [] do_page_fault+0x140/0x330 [] do_mem_abort+0x54/0xb0 Exception stack(0xfffffffb20247a70 to 0xfffffffb20247ba0) [...] [] el1_da+0x18/0x78 [] path_parentat+0x44/0x88 [] filename_parentat+0x5c/0xd8 [] filename_create+0x4c/0x128 [] SyS_mkdirat+0x50/0xc8 [] el0_svc_naked+0x24/0x28 Code: 36380080 d5384100 f9400800 9402566d (d4210000) ---[ end trace 2d01889f2bca9b9f ]--- Fix this by dispatching all translation faults to do_translation_faults, which avoids invoking the page fault logic for faults on kernel addresses. Cc: Reported-by: Ankit Jain Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 89993c4be1be..2069e9bc0fca 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -651,7 +651,7 @@ static const struct fault_info fault_info[] = { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, - { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "unknown 8" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, -- cgit From bc829ee36e0ec92383c9d9b88fe08f00d4d592f8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 29 Sep 2017 11:24:19 -0500 Subject: x86/mm: Disable branch profiling in mem_encrypt.c Some routines in mem_encrypt.c are called very early in the boot process, e.g. sme_encrypt_kernel(). When CONFIG_TRACE_BRANCH_PROFILING=y is defined the resulting branch profiling associated with the check to see if SME is active results in a kernel crash. Disable branch profiling for mem_encrypt.c by defining DISABLE_BRANCH_PROFILING before including any header files. Reported-by: kernel test robot Signed-off-by: Tom Lendacky Acked-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929162419.6016.53390.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/mm/mem_encrypt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 3fcc8e01683b..16c5f37933a2 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define DISABLE_BRANCH_PROFILING + #include #include #include -- cgit From 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 29 Sep 2017 17:15:36 +0300 Subject: x86/asm: Use register variable to get stack pointer value Currently we use current_stack_pointer() function to get the value of the stack pointer register. Since commit: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") ... we have a stack register variable declared. It can be used instead of current_stack_pointer() function which allows to optimize away some excessive "mov %rsp, %" instructions: -mov %rsp,%rdx -sub %rdx,%rax -cmp $0x3fff,%rax -ja ffffffff810722fd +sub %rsp,%rax +cmp $0x3fff,%rax +ja ffffffff810722fa Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer and use it instead of the removed function. Signed-off-by: Andrey Ryabinin Reviewed-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/asm.h | 4 ++-- arch/x86/include/asm/thread_info.h | 11 ----------- arch/x86/kernel/irq_32.c | 6 +++--- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/tlb.c | 2 +- 5 files changed, 7 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 30c3c9ac784a..b0dc91f4bedc 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -141,8 +141,8 @@ * gets set up by the containing function. If you forget to do this, objtool * may print a "call without frame pointer save/setup" warning. */ -register unsigned long __asm_call_sp asm(_ASM_SP); -#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 5161da1a0fa0..89e7eeb5cec1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -158,17 +158,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1f38d9a4d9de..d4eb450144fd 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack) static inline void *current_stack(void) { - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); } static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) @@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) /* Save the next esp at the bottom of the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -139,7 +139,7 @@ void do_softirq_own_stack(void) /* Push the previous esp onto the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 34ea3651362e..67db4f43309e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) * from double_fault. */ BUG_ON((unsigned long)(current_top_of_stack() - - current_stack_pointer()) >= THREAD_SIZE); + current_stack_pointer) >= THREAD_SIZE); preempt_enable_no_resched(); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 93fe97cce581..49d9778376d7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -191,7 +191,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * mapped in the new pgd, we'll double-fault. Forcibly * map it. */ - unsigned int index = pgd_index(current_stack_pointer()); + unsigned int index = pgd_index(current_stack_pointer); pgd_t *pgd = next->pgd + index; if (unlikely(pgd_none(*pgd))) -- cgit