From d8eabc37310a92df40d07c5a8afc53cebf996716 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Feb 2019 12:36:50 +0100 Subject: x86/msr-index: Cleanup bit defines Greg pointed out that speculation related bit defines are using (1 << N) format instead of BIT(N). Aside of that (1 << N) is wrong as it should use 1UL at least. Clean it up. [ Josh Poimboeuf: Fix tools build ] Reported-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/include/asm/msr-index.h | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8e40c2446fd1..e4074556c37b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H +#include + /* * CPU model specific register (MSR) numbers. * @@ -40,14 +42,14 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -69,20 +71,20 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e -- cgit From 36ad35131adacc29b328b9c8b6277a8bf0d6fd5d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Feb 2019 10:10:23 +0100 Subject: x86/speculation: Consolidate CPU whitelists The CPU vulnerability whitelists have some overlap and there are more whitelists coming along. Use the driver_data field in the x86_cpu_id struct to denote the whitelisted vulnerabilities and combine all whitelists into one. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/kernel/cpu/common.c | 110 +++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..26ec15034f86 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -948,61 +948,72 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, - { X86_VENDOR_CENTAUR, 5 }, - { X86_VENDOR_INTEL, 5 }, - { X86_VENDOR_NSC, 5 }, - { X86_VENDOR_ANY, 4 }, +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) + +#define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } + +#define VULNWL_INTEL(model, whitelist) \ + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) + +#define VULNWL_AMD(family, whitelist) \ + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) + +#define VULNWL_HYGON(family, whitelist) \ + VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) + +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), + + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_L1TF), + + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), {} }; -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - { X86_VENDOR_AMD }, - { X86_VENDOR_HYGON }, - {} -}; - -/* Only list CPUs which speculate but are non susceptible to SSB */ -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - { X86_VENDOR_AMD, 0x12, }, - { X86_VENDOR_AMD, 0x11, }, - { X86_VENDOR_AMD, 0x10, }, - { X86_VENDOR_AMD, 0xf, }, - {} -}; +static bool __init cpu_matches(unsigned long which) +{ + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { - /* in addition to cpu_no_speculation */ - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - {} -}; + return m && !!(m->driver_data & which); +} static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_speculation)) + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); @@ -1011,15 +1022,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (x86_match_cpu(cpu_no_meltdown)) + if (cpu_matches(NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1028,7 +1038,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (x86_match_cpu(cpu_no_l1tf)) + if (cpu_matches(NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); -- cgit From ed5194c2732c8084af9fd159c146ea92bf137128 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 18 Jan 2019 16:50:16 -0800 Subject: x86/speculation/mds: Add basic bug infrastructure for MDS Microarchitectural Data Sampling (MDS), is a class of side channel attacks on internal buffers in Intel CPUs. The variants are: - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a dependent load (store-to-load forwarding) as an optimization. The forward can also happen to a faulting or assisting load operation for a different memory address, which can be exploited under certain conditions. Store buffers are partitioned between Hyper-Threads so cross thread forwarding is not possible. But if a thread enters or exits a sleep state the store buffer is repartitioned which can expose data from one thread to the other. MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage L1 miss situations and to hold data which is returned or sent in response to a memory or I/O operation. Fill buffers can forward data to a load operation and also write data to the cache. When the fill buffer is deallocated it can retain the stale data of the preceding operations which can then be forwarded to a faulting or assisting load operation, which can be exploited under certain conditions. Fill buffers are shared between Hyper-Threads so cross thread leakage is possible. MLDPS leaks Load Port Data. Load ports are used to perform load operations from memory or I/O. The received data is then forwarded to the register file or a subsequent operation. In some implementations the Load Port can contain stale data from a previous operation which can be forwarded to faulting or assisting loads under certain conditions, which again can be exploited eventually. Load ports are shared between Hyper-Threads so cross thread leakage is possible. All variants have the same mitigation for single CPU thread case (SMT off), so the kernel can treat them as one MDS issue. Add the basic infrastructure to detect if the current CPU is affected by MDS. [ tglx: Rewrote changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/msr-index.h | 5 +++++ arch/x86/kernel/cpu/common.c | 25 ++++++++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6d6122524711..ae3f987b24f1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ @@ -381,5 +382,6 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e4074556c37b..e2d30636c03f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -79,6 +79,11 @@ * attack, so no Speculative Store Bypass * control required. */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 26ec15034f86..e34817bca504 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -952,6 +952,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_MELTDOWN BIT(1) #define NO_SSB BIT(2) #define NO_L1TF BIT(3) +#define NO_MDS BIT(4) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -971,6 +972,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + /* Intel Family 6 */ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), @@ -987,18 +989,20 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(CORE_YONAH, NO_SSB), VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_L1TF), - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), {} }; @@ -1029,6 +1033,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) + setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(NO_MELTDOWN)) return; -- cgit From e261f209c3666e842fd645a1e31f001c3a26def9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 Mar 2019 20:21:08 +0100 Subject: x86/speculation/mds: Add BUG_MSBDS_ONLY This bug bit is set on CPUs which are only affected by Microarchitectural Store Buffer Data Sampling (MSBDS) and not by any other MDS variant. This is important because the Store Buffers are partitioned between Hyper-Threads so cross thread forwarding is not possible. But if a thread enters or exits a sleep state the store buffer is repartitioned which can expose data from one thread to the other. This transition can be mitigated. That means that for CPUs which are only affected by MSBDS SMT can be enabled, if the CPU is not affected by other SMT sensitive vulnerabilities, e.g. L1TF. The XEON PHI variants fall into that category. Also the Silvermont/Airmont ATOMs, but for them it's not really relevant as they do not support SMT, but mark them for completeness sake. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ae3f987b24f1..bdcea163850a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -383,5 +383,6 @@ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e34817bca504..132a63dc5a76 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -953,6 +953,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SSB BIT(2) #define NO_L1TF BIT(3) #define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -979,16 +980,16 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), @@ -1033,8 +1034,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(MSBDS_ONLY)) + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); + } if (cpu_matches(NO_MELTDOWN)) return; -- cgit From 6c4dbbd14730c43f4ed808a9c42ca41625925c22 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 18 Jan 2019 16:50:23 -0800 Subject: x86/kvm: Expose X86_FEATURE_MD_CLEAR to guests X86_FEATURE_MD_CLEAR is a new CPUID bit which is set when microcode provides the mechanism to invoke a flush of various exploitable CPU buffers by invoking the VERW instruction. Hand it through to guests so they can adjust their mitigations. This also requires corresponding qemu changes, which are available separately. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c07958b59f50..39501e7afdb4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -410,7 +410,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); -- cgit From 6a9e529272517755904b7afa639f6db59ddb793e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:13:06 +0100 Subject: x86/speculation/mds: Add mds_clear_cpu_buffers() The Microarchitectural Data Sampling (MDS) vulernabilities are mitigated by clearing the affected CPU buffers. The mechanism for clearing the buffers uses the unused and obsolete VERW instruction in combination with a microcode update which triggers a CPU buffer clear when VERW is executed. Provide a inline function with the assembly magic. The argument of the VERW instruction must be a memory operand as documented: "MD_CLEAR enumerates that the memory-operand variant of VERW (for example, VERW m16) has been extended to also overwrite buffers affected by MDS. This buffer overwriting functionality is not guaranteed for the register operand variant of VERW." Documentation also recommends to use a writable data segment selector: "The buffer overwriting occurs regardless of the result of the VERW permission check, as well as when the selector is null or causes a descriptor load segment violation. However, for lowest latency we recommend using a selector that indicates a valid writable data segment." Add x86 specific documentation about MDS and the internal workings of the mitigation. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/include/asm/nospec-branch.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index dad12b767ba0..67cb9b2082b1 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -318,6 +318,31 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +#include + +/** + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + #endif /* __ASSEMBLY__ */ /* -- cgit From 04dcbdb8057827b043b3c71aa397c4c63e67d086 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:42:51 +0100 Subject: x86/speculation/mds: Clear CPU buffers on exit to user Add a static key which controls the invocation of the CPU buffer clear mechanism on exit to user space and add the call into prepare_exit_to_usermode() and do_nmi() right before actually returning. Add documentation which kernel to user space transition this covers and explain why some corner cases are not mitigated. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/entry/common.c | 3 +++ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ arch/x86/kernel/cpu/bugs.c | 3 +++ arch/x86/kernel/nmi.c | 4 ++++ arch/x86/kernel/traps.c | 8 ++++++++ 5 files changed, 31 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7bc105f47d21..19f650d729f5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -31,6 +31,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) #endif user_enter_irqoff(); + + mds_user_clear_cpu_buffers(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 67cb9b2082b1..65b747286d96 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -318,6 +318,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +DECLARE_STATIC_KEY_FALSE(mds_user_clear); + #include /** @@ -343,6 +345,17 @@ static inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01874d54f4fd..dbb45014de1b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -63,6 +63,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +/* Control MDS CPU buffer clear before returning to user space */ +DEFINE_STATIC_KEY_FALSE(mds_user_clear); + void __init check_bugs(void) { identify_boot_cpu(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 18bc9b51ac9b..086cf1d1d71d 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -34,6 +34,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -533,6 +534,9 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; + + if (user_mode(regs)) + mds_user_clear_cpu_buffers(); } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9b7c4ca8f0a7..85fe1870f873 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -366,6 +367,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&gpregs->orig_ax; + /* + * This situation can be triggered by userspace via + * modify_ldt(2) and the return does not take the regular + * user space exit, so a CPU buffer clear is required when + * MDS mitigation is enabled. + */ + mds_user_clear_cpu_buffers(); return; } #endif -- cgit From 650b68a0622f933444a6d66936abb3103029413b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Feb 2019 12:48:14 +0100 Subject: x86/kvm/vmx: Add MDS protection when L1D Flush is not active CPUs which are affected by L1TF and MDS mitigate MDS with the L1D Flush on VMENTER when updated microcode is installed. If a CPU is not affected by L1TF or if the L1D Flush is not in use, then MDS mitigation needs to be invoked explicitly. For these cases, follow the host mitigation state and invoke the MDS mitigation before VMENTER. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/kernel/cpu/bugs.c | 1 + arch/x86/kvm/vmx/vmx.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index dbb45014de1b..29ed8e8dfee2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -65,6 +65,7 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control MDS CPU buffer clear before returning to user space */ DEFINE_STATIC_KEY_FALSE(mds_user_clear); +EXPORT_SYMBOL_GPL(mds_user_clear); void __init check_bugs(void) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 30a6bcd735ec..544bd24a9c1e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6369,8 +6369,11 @@ static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? (unsigned long)¤t_evmcs->host_rsp : 0; + /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); + else if (static_branch_unlikely(&mds_user_clear)) + mds_clear_cpu_buffers(); asm( /* Store host registers */ -- cgit From 07f07f55a29cb705e221eda7894dd67ab81ef343 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:04:01 +0100 Subject: x86/speculation/mds: Conditionally clear CPU buffers on idle entry Add a static key which controls the invocation of the CPU buffer clear mechanism on idle entry. This is independent of other MDS mitigations because the idle entry invocation to mitigate the potential leakage due to store buffer repartitioning is only necessary on SMT systems. Add the actual invocations to the different halt/mwait variants which covers all usage sites. mwaitx is not patched as it's not available on Intel CPUs. The buffer clear is only invoked before entering the C-State to prevent that stale data from the idling CPU is spilled to the Hyper-Thread sibling after the Store buffer got repartitioned and all entries are available to the non idle sibling. When coming out of idle the store buffer is partitioned again so each sibling has half of it available. Now CPU which returned from idle could be speculatively exposed to contents of the sibling, but the buffers are flushed either on exit to user space or on VMENTER. When later on conditional buffer clearing is implemented on top of this, then there is no action required either because before returning to user space the context switch will set the condition flag which causes a flush on the return to user path. Note, that the buffer clearing on idle is only sensible on CPUs which are solely affected by MSBDS and not any other variant of MDS because the other MDS variants cannot be mitigated when SMT is enabled, so the buffer clearing on idle would be a window dressing exercise. This intentionally does not handle the case in the acpi/processor_idle driver which uses the legacy IO port interface for C-State transitions for two reasons: - The acpi/processor_idle driver was replaced by the intel_idle driver almost a decade ago. Anything Nehalem upwards supports it and defaults to that new driver. - The legacy IO port interface is likely to be used on older and therefore unaffected CPUs or on systems which do not receive microcode updates anymore, so there is no point in adding that. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/include/asm/irqflags.h | 4 ++++ arch/x86/include/asm/mwait.h | 7 +++++++ arch/x86/include/asm/nospec-branch.h | 12 ++++++++++++ arch/x86/kernel/cpu/bugs.c | 3 +++ 4 files changed, 26 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 058e40fed167..8a0e56e1dcc9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -6,6 +6,8 @@ #ifndef __ASSEMBLY__ +#include + /* Provide __cpuidle; we can't safely include */ #define __cpuidle __attribute__((__section__(".cpuidle.text"))) @@ -54,11 +56,13 @@ static inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 39a2fb29378a..eb0f80ce8524 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -6,6 +6,7 @@ #include #include +#include #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { + /* No MDS buffer clear as this is AMD/HYGON only */ + /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" :: "a" (eax), "b" (ebx), "c" (ecx)); @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 65b747286d96..4e970390110f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -319,6 +319,7 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); #include @@ -356,6 +357,17 @@ static inline void mds_user_clear_cpu_buffers(void) mds_clear_cpu_buffers(); } +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 29ed8e8dfee2..916995167301 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -66,6 +66,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control MDS CPU buffer clear before returning to user space */ DEFINE_STATIC_KEY_FALSE(mds_user_clear); EXPORT_SYMBOL_GPL(mds_user_clear); +/* Control MDS CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); +EXPORT_SYMBOL_GPL(mds_idle_clear); void __init check_bugs(void) { -- cgit From bc1241700acd82ec69fde98c5763ce51086269f8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 22:04:08 +0100 Subject: x86/speculation/mds: Add mitigation control for MDS Now that the mitigations are in place, add a command line parameter to control the mitigation, a mitigation selector function and a SMT update mechanism. This is the minimal straight forward initial implementation which just provides an always on/off mode. The command line parameter is: mds=[full|off] This is consistent with the existing mitigations for other speculative hardware vulnerabilities. The idle invocation is dynamically updated according to the SMT state of the system similar to the dynamic update of the STIBP mitigation. The idle mitigation is limited to CPUs which are only affected by MSBDS and not any other variant, because the other variants cannot be mitigated on SMT enabled systems. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/include/asm/processor.h | 5 +++ arch/x86/kernel/cpu/bugs.c | 70 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 33051436c864..1f0295783325 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -992,4 +992,9 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 916995167301..c7b29d200d27 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,6 +37,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); +static void __init mds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -108,6 +109,8 @@ void __init check_bugs(void) l1tf_select_mitigation(); + mds_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -213,6 +216,50 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "MDS: " fmt + +/* Default mitigation for L1TF-affected CPUs */ +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; + +static const char * const mds_strings[] = { + [MDS_MITIGATION_OFF] = "Vulnerable", + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers" +}; + +static void __init mds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_OFF; + return; + } + + if (mds_mitigation == MDS_MITIGATION_FULL) { + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) + static_branch_enable(&mds_user_clear); + else + mds_mitigation = MDS_MITIGATION_OFF; + } + pr_info("%s\n", mds_strings[mds_mitigation]); +} + +static int __init mds_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + mds_mitigation = MDS_MITIGATION_OFF; + else if (!strcmp(str, "full")) + mds_mitigation = MDS_MITIGATION_FULL; + + return 0; +} +early_param("mds", mds_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -617,6 +664,26 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +/* Update the static key controlling the MDS CPU buffer clear in idle */ +static void update_mds_branch_idle(void) +{ + /* + * Enable the idle clearing if SMT is active on CPUs which are + * affected only by MSBDS and not any other MDS variant. + * + * The other variants cannot be mitigated when SMT is enabled, so + * clearing the buffers on idle just to prevent the Store Buffer + * repartitioning leak would be a window dressing exercise. + */ + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + return; + + if (sched_smt_active()) + static_branch_enable(&mds_idle_clear); + else + static_branch_disable(&mds_idle_clear); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -638,6 +705,9 @@ void arch_smt_update(void) break; } + if (mds_mitigation == MDS_MITIGATION_FULL) + update_mds_branch_idle(); + mutex_unlock(&spec_ctrl_mutex); } -- cgit From 8a4b06d391b0a42a373808979b5028f5c84d9c6a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 22:51:43 +0100 Subject: x86/speculation/mds: Add sysfs reporting for MDS Add the sysfs reporting file for MDS. It exposes the vulnerability and mitigation state similar to the existing files for the other speculative hardware vulnerabilities. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/kernel/cpu/bugs.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c7b29d200d27..7ab16a6ed064 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1172,6 +1172,22 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static ssize_t mds_show_state(char *buf) +{ + if (!hypervisor_is_type(X86_HYPER_NATIVE)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); + } + + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "mitigated" : "disabled"); + } + + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1238,6 +1254,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) return l1tf_show_state(buf); break; + + case X86_BUG_MDS: + return mds_show_state(buf); + default: break; } @@ -1269,4 +1289,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b { return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); } + +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); +} #endif -- cgit From 22dd8365088b6403630b82423cf906491859b65e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Feb 2019 09:40:40 +0100 Subject: x86/speculation/mds: Add mitigation mode VMWERV In virtualized environments it can happen that the host has the microcode update which utilizes the VERW instruction to clear CPU buffers, but the hypervisor is not yet updated to expose the X86_FEATURE_MD_CLEAR CPUID bit to guests. Introduce an internal mitigation mode VMWERV which enables the invocation of the CPU buffer clearing even if X86_FEATURE_MD_CLEAR is not set. If the system has no updated microcode this results in a pointless execution of the VERW instruction wasting a few CPU cycles. If the microcode is updated, but not exposed to a guest then the CPU buffers will be cleared. That said: Virtual Machines Will Eventually Receive Vaccine Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1f0295783325..aca1ef8cc79f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -995,6 +995,7 @@ extern enum l1tf_mitigations l1tf_mitigation; enum mds_mitigations { MDS_MITIGATION_OFF, MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, }; #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7ab16a6ed064..95cda38c8785 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -224,7 +224,8 @@ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL static const char * const mds_strings[] = { [MDS_MITIGATION_OFF] = "Vulnerable", - [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers" + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", }; static void __init mds_select_mitigation(void) @@ -235,10 +236,9 @@ static void __init mds_select_mitigation(void) } if (mds_mitigation == MDS_MITIGATION_FULL) { - if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) - static_branch_enable(&mds_user_clear); - else - mds_mitigation = MDS_MITIGATION_OFF; + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + static_branch_enable(&mds_user_clear); } pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -705,8 +705,14 @@ void arch_smt_update(void) break; } - if (mds_mitigation == MDS_MITIGATION_FULL) + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_VMWERV: update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } mutex_unlock(&spec_ctrl_mutex); } -- cgit From 65fd4cb65b2dad97feb8330b6690445910b56d6a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Feb 2019 11:10:49 +0100 Subject: Documentation: Move L1TF to separate directory Move L!TF to a separate directory so the MDS stuff can be added at the side. Otherwise the all hardware vulnerabilites have their own top level entry. Should have done that right away. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jon Masters --- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/vmx/vmx.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 95cda38c8785..373ae1dcd301 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1107,7 +1107,7 @@ static void __init l1tf_select_mitigation(void) pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); pr_info("However, doing so will make a part of your RAM unusable.\n"); - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); return; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 544bd24a9c1e..b0597507bde7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6801,8 +6801,8 @@ free_partial_vcpu: return ERR_PTR(err); } -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" static int vmx_vm_init(struct kvm *kvm) { -- cgit From a07b20004793d8926f78d63eb5980559f7813404 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Nov 2018 17:40:30 +0000 Subject: vfs: syscall: Add open_tree(2) to reference or clone a mount open_tree(dfd, pathname, flags) Returns an O_PATH-opened file descriptor or an error. dfd and pathname specify the location to open, in usual fashion (see e.g. fstatat(2)). flags should be an OR of some of the following: * AT_PATH_EMPTY, AT_NO_AUTOMOUNT, AT_SYMLINK_NOFOLLOW - same meanings as usual * OPEN_TREE_CLOEXEC - make the resulting descriptor close-on-exec * OPEN_TREE_CLONE or OPEN_TREE_CLONE | AT_RECURSIVE - instead of opening the location in question, create a detached mount tree matching the subtree rooted at location specified by dfd/pathname. With AT_RECURSIVE the entire subtree is cloned, without it - only the part within in the mount containing the location in question. In other words, the same as mount --rbind or mount --bind would've taken. The detached tree will be dissolved on the final close of obtained file. Creation of such detached trees requires the same capabilities as doing mount --bind. Signed-off-by: Al Viro Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 1f9607ed087c..ae2294d07ecb 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -398,7 +398,8 @@ 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq -# don't use numbers 387 through 392, add new calls at the end +387 i386 open_tree sys_open_tree __ia32_sys_open_tree +# don't use numbers 388 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..a6e06c35b5b1 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,7 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +335 common open_tree __x64_sys_open_tree # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit From 2db154b3ea8e14b04fee23e3fdfd5e9d17fbc6ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Nov 2018 17:40:30 +0000 Subject: vfs: syscall: Add move_mount(2) to move mounts around Add a move_mount() system call that will move a mount from one place to another and, in the next commit, allow to attach an unattached mount tree. The new system call looks like the following: int move_mount(int from_dfd, const char *from_path, int to_dfd, const char *to_path, unsigned int flags); Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ae2294d07ecb..0db9effb18d9 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -399,7 +399,8 @@ 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq 387 i386 open_tree sys_open_tree __ia32_sys_open_tree -# don't use numbers 388 through 392, add new calls at the end +388 i386 move_mount sys_move_mount __ia32_sys_move_mount +# don't use numbers 389 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index a6e06c35b5b1..0440f0eefa02 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -344,6 +344,7 @@ 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq 335 common open_tree __x64_sys_open_tree +336 common move_mount __x64_sys_move_mount # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit From dadd2299ab61fc2b55b95b7b3a8f674cdd3b69c9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Nov 2018 17:40:31 +0000 Subject: Make anon_inodes unconditional Make the anon_inodes facility unconditional so that it can be used by core VFS code. Signed-off-by: David Howells Signed-off-by: Al Viro --- arch/x86/Kconfig | 1 - arch/x86/kvm/Kconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1f9b3cf437c..18f2c954464e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -44,7 +44,6 @@ config X86 # select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ANON_INODES select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_DATA select ARCH_CLOCKSOURCE_INIT diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 72fa955f4a15..fc042419e670 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,7 +27,6 @@ config KVM depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER - select ANON_INODES select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select IRQ_BYPASS_MANAGER -- cgit From 24dcb3d90a1f67fe08c68a004af37df059d74005 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:33:31 +0000 Subject: vfs: syscall: Add fsopen() to prepare for superblock creation Provide an fsopen() system call that starts the process of preparing to create a superblock that will then be mountable, using an fd as a context handle. fsopen() is given the name of the filesystem that will be used: int mfd = fsopen(const char *fsname, unsigned int flags); where flags can be 0 or FSOPEN_CLOEXEC. For example: sfd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(sfd, FSCONFIG_SET_PATH, "source", "/dev/sda1", AT_FDCWD); fsconfig(sfd, FSCONFIG_SET_FLAG, "noatime", NULL, 0); fsconfig(sfd, FSCONFIG_SET_FLAG, "acl", NULL, 0); fsconfig(sfd, FSCONFIG_SET_FLAG, "user_xattr", NULL, 0); fsconfig(sfd, FSCONFIG_SET_STRING, "sb", "1", 0); fsconfig(sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); fsinfo(sfd, NULL, ...); // query new superblock attributes mfd = fsmount(sfd, FSMOUNT_CLOEXEC, MS_RELATIME); move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH); sfd = fsopen("afs", -1); fsconfig(fd, FSCONFIG_SET_STRING, "source", "#grand.central.org:root.cell", 0); fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); mfd = fsmount(sfd, 0, MS_NODEV); move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH); If an error is reported at any step, an error message may be available to be read() back (ENODATA will be reported if there isn't an error available) in the form: "e :" "e SELinux:Mount on mountpoint not permitted" Once fsmount() has been called, further fsconfig() calls will incur EBUSY, even if the fsmount() fails. read() is still possible to retrieve error information. The fsopen() syscall creates a mount context and hangs it of the fd that it returns. Netlink is not used because it is optional and would make the core VFS dependent on the networking layer and also potentially add network namespace issues. Note that, for the moment, the caller must have SYS_CAP_ADMIN to use fsopen(). Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0db9effb18d9..37fd1fc5396e 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -400,7 +400,8 @@ 386 i386 rseq sys_rseq __ia32_sys_rseq 387 i386 open_tree sys_open_tree __ia32_sys_open_tree 388 i386 move_mount sys_move_mount __ia32_sys_move_mount -# don't use numbers 389 through 392, add new calls at the end +389 i386 fsopen sys_fsopen __ia32_sys_fsopen +# don't use numbers 390 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 0440f0eefa02..511608a21611 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -345,6 +345,7 @@ 334 common rseq __x64_sys_rseq 335 common open_tree __x64_sys_open_tree 336 common move_mount __x64_sys_move_mount +337 common fsopen __x64_sys_fsopen # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit From ecdab150fddb42fe6a739335257949220033b782 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:09 +0000 Subject: vfs: syscall: Add fsconfig() for configuring and managing a context Add a syscall for configuring a filesystem creation context and triggering actions upon it, to be used in conjunction with fsopen, fspick and fsmount. long fsconfig(int fs_fd, unsigned int cmd, const char *key, const void *value, int aux); Where fs_fd indicates the context, cmd indicates the action to take, key indicates the parameter name for parameter-setting actions and, if needed, value points to a buffer containing the value and aux can give more information for the value. The following command IDs are proposed: (*) FSCONFIG_SET_FLAG: No value is specified. The parameter must be boolean in nature. The key may be prefixed with "no" to invert the setting. value must be NULL and aux must be 0. (*) FSCONFIG_SET_STRING: A string value is specified. The parameter can be expecting boolean, integer, string or take a path. A conversion to an appropriate type will be attempted (which may include looking up as a path). value points to a NUL-terminated string and aux must be 0. (*) FSCONFIG_SET_BINARY: A binary blob is specified. value points to the blob and aux indicates its size. The parameter must be expecting a blob. (*) FSCONFIG_SET_PATH: A non-empty path is specified. The parameter must be expecting a path object. value points to a NUL-terminated string that is the path and aux is a file descriptor at which to start a relative lookup or AT_FDCWD. (*) FSCONFIG_SET_PATH_EMPTY: As fsconfig_set_path, but with AT_EMPTY_PATH implied. (*) FSCONFIG_SET_FD: An open file descriptor is specified. value must be NULL and aux indicates the file descriptor. (*) FSCONFIG_CMD_CREATE: Trigger superblock creation. (*) FSCONFIG_CMD_RECONFIGURE: Trigger superblock reconfiguration. For the "set" command IDs, the idea is that the file_system_type will point to a list of parameters and the types of value that those parameters expect to take. The core code can then do the parse and argument conversion and then give the LSM and FS a cooked option or array of options to use. Source specification is also done the same way same way, using special keys "source", "source1", "source2", etc.. [!] Note that, for the moment, the key and value are just glued back together and handed to the filesystem. Every filesystem that uses options uses match_token() and co. to do this, and this will need to be changed - but not all at once. Example usage: fd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_path, "source", "/dev/sda1", AT_FDCWD); fsconfig(fd, fsconfig_set_path_empty, "journal_path", "", journal_fd); fsconfig(fd, fsconfig_set_fd, "journal_fd", "", journal_fd); fsconfig(fd, fsconfig_set_flag, "user_xattr", NULL, 0); fsconfig(fd, fsconfig_set_flag, "noacl", NULL, 0); fsconfig(fd, fsconfig_set_string, "sb", "1", 0); fsconfig(fd, fsconfig_set_string, "errors", "continue", 0); fsconfig(fd, fsconfig_set_string, "data", "journal", 0); fsconfig(fd, fsconfig_set_string, "context", "unconfined_u:...", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "/dev/sda1", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("afs", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "#grand.central.org:root.cell", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("jffs2", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "mtd0", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 37fd1fc5396e..786728143205 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -401,7 +401,8 @@ 387 i386 open_tree sys_open_tree __ia32_sys_open_tree 388 i386 move_mount sys_move_mount __ia32_sys_move_mount 389 i386 fsopen sys_fsopen __ia32_sys_fsopen -# don't use numbers 390 through 392, add new calls at the end +390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig +# don't use numbers 391 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 511608a21611..7039a809d37d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -346,6 +346,7 @@ 335 common open_tree __x64_sys_open_tree 336 common move_mount __x64_sys_move_mount 337 common fsopen __x64_sys_fsopen +338 common fsconfig __x64_sys_fsconfig # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit From 93766fbd2696c2c4453dd8e1070977e9cd4e6b6d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:14 +0000 Subject: vfs: syscall: Add fsmount() to create a mount for a superblock Provide a system call by which a filesystem opened with fsopen() and configured by a series of fsconfig() calls can have a detached mount object created for it. This mount object can then be attached to the VFS mount hierarchy using move_mount() by passing the returned file descriptor as the from directory fd. The system call looks like: int mfd = fsmount(int fsfd, unsigned int flags, unsigned int attr_flags); where fsfd is the file descriptor returned by fsopen(). flags can be 0 or FSMOUNT_CLOEXEC. attr_flags is a bitwise-OR of the following flags: MOUNT_ATTR_RDONLY Mount read-only MOUNT_ATTR_NOSUID Ignore suid and sgid bits MOUNT_ATTR_NODEV Disallow access to device special files MOUNT_ATTR_NOEXEC Disallow program execution MOUNT_ATTR__ATIME Setting on how atime should be updated MOUNT_ATTR_RELATIME - Update atime relative to mtime/ctime MOUNT_ATTR_NOATIME - Do not update access times MOUNT_ATTR_STRICTATIME - Always perform atime updates MOUNT_ATTR_NODIRATIME Do not update directory access times In the event that fsmount() fails, it may be possible to get an error message by calling read() on fsfd. If no message is available, ENODATA will be reported. Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 786728143205..5b5c9189c507 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -402,7 +402,8 @@ 388 i386 move_mount sys_move_mount __ia32_sys_move_mount 389 i386 fsopen sys_fsopen __ia32_sys_fsopen 390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig -# don't use numbers 391 through 392, add new calls at the end +391 i386 fsmount sys_fsmount __ia32_sys_fsmount +# don't use number 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 7039a809d37d..984ad594bb2b 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -347,6 +347,7 @@ 336 common move_mount __x64_sys_move_mount 337 common fsopen __x64_sys_fsopen 338 common fsconfig __x64_sys_fsconfig +339 common fsmount __x64_sys_fsmount # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit From cf3cba4a429be43e5527a3f78859b1bfd9ebc5fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:23 +0000 Subject: vfs: syscall: Add fspick() to select a superblock for reconfiguration Provide an fspick() system call that can be used to pick an existing mountpoint into an fs_context which can thereafter be used to reconfigure a superblock (equivalent of the superblock side of -o remount). This looks like: int fd = fspick(AT_FDCWD, "/mnt", FSPICK_CLOEXEC | FSPICK_NO_AUTOMOUNT); fsconfig(fd, FSCONFIG_SET_FLAG, "intr", NULL, 0); fsconfig(fd, FSCONFIG_SET_FLAG, "noac", NULL, 0); fsconfig(fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0); At the point of fspick being called, the file descriptor referring to the filesystem context is in exactly the same state as the one that was created by fsopen() after fsmount() has been successfully called. Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 5b5c9189c507..4cd5f982b1e5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -403,7 +403,7 @@ 389 i386 fsopen sys_fsopen __ia32_sys_fsopen 390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig 391 i386 fsmount sys_fsmount __ia32_sys_fsmount -# don't use number 392, add new calls at the end +392 i386 fspick sys_fspick __ia32_sys_fspick 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 984ad594bb2b..64ca0d06259a 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -348,6 +348,7 @@ 337 common fsopen __x64_sys_fsopen 338 common fsconfig __x64_sys_fsconfig 339 common fsmount __x64_sys_fsmount +340 common fspick __x64_sys_fspick # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit From 16add411645cff83360086e102daa67b25f1e39a Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:30:18 +0300 Subject: syscall_get_arch: add "struct task_struct *" argument This argument is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request: syscall_get_arch() is going to be called from ptrace_request() along with syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions with a tracee as their argument. The primary intent is that the triple (audit_arch, syscall_nr, arg1..arg6) should describe what system call is being called and what its arguments are. Reverts: 5e937a9ae913 ("syscall_get_arch: remove useless function arguments") Reverts: 1002d94d3076 ("syscall.h: fix doc text for syscall_get_arch()") Reviewed-by: Andy Lutomirski # for x86 Reviewed-by: Palmer Dabbelt Acked-by: Paul Moore Acked-by: Paul Burton # MIPS parts Acked-by: Michael Ellerman (powerpc) Acked-by: Kees Cook # seccomp parts Acked-by: Mark Salter # for the c6x bit Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: x86@kernel.org Cc: linux-alpha@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- arch/x86/include/asm/syscall.h | 8 +++++--- arch/x86/um/asm/syscall.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d653139857af..435f3f09279c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -107,7 +107,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } @@ -236,10 +236,12 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ - return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + return (IS_ENABLED(CONFIG_IA32_EMULATION) && + task->thread_info.status & TS_COMPAT) + ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h index ef898af102d1..56a2f0913e3c 100644 --- a/arch/x86/um/asm/syscall.h +++ b/arch/x86/um/asm/syscall.h @@ -9,7 +9,7 @@ typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_X86_32 return AUDIT_ARCH_I386; -- cgit From 8b56d3488d87555c77494b1fb90dbea84ff0e9fc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Mar 2019 12:00:51 -0700 Subject: crypto: x86/aesni - convert to use skcipher SIMD bulk registration Convert the AES-NI glue code to use simd_register_skciphers_compat() to create SIMD wrappers for all the internal skcipher algorithms at once, rather than wrapping each one individually. This simplifies the code. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 43 +++++++------------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 1e3d2102033a..451918cc5f73 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1253,23 +1253,9 @@ static const struct x86_cpu_id aesni_cpu_id[] = { }; MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); -static void aesni_free_simds(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) && - aesni_simd_skciphers[i]; i++) - simd_skcipher_free(aesni_simd_skciphers[i]); -} - static int __init aesni_init(void) { - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; int err; - int i; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1304,8 +1290,9 @@ static int __init aesni_init(void) if (err) return err; - err = crypto_register_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + err = simd_register_skciphers_compat(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); if (err) goto unregister_algs; @@ -1314,26 +1301,11 @@ static int __init aesni_init(void) if (err) goto unregister_skciphers; - for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) { - algname = aesni_skciphers[i].base.cra_name + 2; - drvname = aesni_skciphers[i].base.cra_driver_name + 2; - basename = aesni_skciphers[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aesni_simd_skciphers[i] = simd; - } - return 0; -unregister_simds: - aesni_free_simds(); - crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); unregister_skciphers: - crypto_unregister_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); unregister_algs: crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); return err; @@ -1341,10 +1313,9 @@ unregister_algs: static void __exit aesni_exit(void) { - aesni_free_simds(); crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); - crypto_unregister_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); } -- cgit From 149e12252fb38937d3efa4419be1a87884ab1427 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Mar 2019 12:00:52 -0700 Subject: crypto: x86/aesni - convert to use AEAD SIMD helpers Convert the AES-NI implementations of "gcm(aes)" and "rfc4106(gcm(aes))" to use the AEAD SIMD helpers, rather than hand-rolling the same functionality. This simplifies the code and also fixes the bug where the user-provided aead_request is modified. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 161 ++++--------------------------------- 1 file changed, 15 insertions(+), 146 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 451918cc5f73..1466d59384ac 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -643,29 +642,6 @@ static int xts_decrypt(struct skcipher_request *req) aes_ctx(ctx->raw_crypt_ctx)); } -static int rfc4106_init(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void rfc4106_exit(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - static int rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) { @@ -710,15 +686,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); } -static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key, - unsigned int key_len) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(parent); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, key_len); -} - +/* This is the Integrity Check Value (aka the authentication tag) length and can + * be 8, 12 or 16 bytes long. */ static int common_rfc4106_set_authsize(struct crypto_aead *aead, unsigned int authsize) { @@ -734,17 +703,6 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead, return 0; } -/* This is the Integrity Check Value (aka the authentication tag length and can - * be 8, 12 or 16 bytes long. */ -static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(parent); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { @@ -964,38 +922,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req) return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, aes_ctx); } - -static int gcmaes_wrapper_encrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(tfm); - struct cryptd_aead *cryptd_tfm = *ctx; - - tfm = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - tfm = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, tfm); - - return crypto_aead_encrypt(req); -} - -static int gcmaes_wrapper_decrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(tfm); - struct cryptd_aead *cryptd_tfm = *ctx; - - tfm = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - tfm = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, tfm); - - return crypto_aead_decrypt(req); -} #endif static struct crypto_alg aesni_algs[] = { { @@ -1148,31 +1074,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req) aes_ctx); } -static int generic_gcmaes_init(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - - return 0; -} - -static void generic_gcmaes_exit(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg aesni_aead_algs[] = { { +static struct aead_alg aesni_aeads[] = { { .setkey = common_rfc4106_set_key, .setauthsize = common_rfc4106_set_authsize, .encrypt = helper_rfc4106_encrypt, @@ -1180,32 +1082,15 @@ static struct aead_alg aesni_aead_algs[] = { { .ivsize = GCM_RFC4106_IV_SIZE, .maxauthsize = 16, .base = { - .cra_name = "__gcm-aes-aesni", - .cra_driver_name = "__driver-gcm-aes-aesni", + .cra_name = "__rfc4106(gcm(aes))", + .cra_driver_name = "__rfc4106-gcm-aesni", + .cra_priority = 400, .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx), .cra_alignmask = AESNI_ALIGN - 1, .cra_module = THIS_MODULE, }, -}, { - .init = rfc4106_init, - .exit = rfc4106_exit, - .setkey = gcmaes_wrapper_set_key, - .setauthsize = gcmaes_wrapper_set_authsize, - .encrypt = gcmaes_wrapper_encrypt, - .decrypt = gcmaes_wrapper_decrypt, - .ivsize = GCM_RFC4106_IV_SIZE, - .maxauthsize = 16, - .base = { - .cra_name = "rfc4106(gcm(aes))", - .cra_driver_name = "rfc4106-gcm-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_module = THIS_MODULE, - }, }, { .setkey = generic_gcmaes_set_key, .setauthsize = generic_gcmaes_set_authsize, @@ -1214,38 +1099,21 @@ static struct aead_alg aesni_aead_algs[] = { { .ivsize = GCM_AES_IV_SIZE, .maxauthsize = 16, .base = { - .cra_name = "__generic-gcm-aes-aesni", - .cra_driver_name = "__driver-generic-gcm-aes-aesni", - .cra_priority = 0, + .cra_name = "__gcm(aes)", + .cra_driver_name = "__generic-gcm-aesni", + .cra_priority = 400, .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), .cra_alignmask = AESNI_ALIGN - 1, .cra_module = THIS_MODULE, }, -}, { - .init = generic_gcmaes_init, - .exit = generic_gcmaes_exit, - .setkey = gcmaes_wrapper_set_key, - .setauthsize = gcmaes_wrapper_set_authsize, - .encrypt = gcmaes_wrapper_encrypt, - .decrypt = gcmaes_wrapper_decrypt, - .ivsize = GCM_AES_IV_SIZE, - .maxauthsize = 16, - .base = { - .cra_name = "gcm(aes)", - .cra_driver_name = "generic-gcm-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_module = THIS_MODULE, - }, } }; #else -static struct aead_alg aesni_aead_algs[0]; +static struct aead_alg aesni_aeads[0]; #endif +static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)]; static const struct x86_cpu_id aesni_cpu_id[] = { X86_FEATURE_MATCH(X86_FEATURE_AES), @@ -1296,8 +1164,8 @@ static int __init aesni_init(void) if (err) goto unregister_algs; - err = crypto_register_aeads(aesni_aead_algs, - ARRAY_SIZE(aesni_aead_algs)); + err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); if (err) goto unregister_skciphers; @@ -1313,7 +1181,8 @@ unregister_algs: static void __exit aesni_exit(void) { - crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); + simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), aesni_simd_skciphers); crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); -- cgit From de272ca72c6152e26b9799d21eb511aac03b6e2d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Mar 2019 12:00:53 -0700 Subject: crypto: x86/aegis128 - convert to use AEAD SIMD helpers Convert the x86 implementation of AEGIS-128 to use the AEAD SIMD helpers, rather than hand-rolling the same functionality. This simplifies the code and also fixes the bug where the user-provided aead_request is modified. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128-aesni-glue.c | 157 +++++++--------------------------- 1 file changed, 30 insertions(+), 127 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 3ea71b871813..bdeee1b830be 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include #include +#include #include #include #include @@ -242,131 +242,35 @@ static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis128_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis128_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis128_aesni_alg[] = { - { - .setkey = crypto_aegis128_aesni_setkey, - .setauthsize = crypto_aegis128_aesni_setauthsize, - .encrypt = crypto_aegis128_aesni_encrypt, - .decrypt = crypto_aegis128_aesni_decrypt, - .init = crypto_aegis128_aesni_init_tfm, - .exit = crypto_aegis128_aesni_exit_tfm, - - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, - .chunksize = AEGIS128_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis128", - .cra_driver_name = "__aegis128-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis128_aesni_setkey, - .setauthsize = cryptd_aegis128_aesni_setauthsize, - .encrypt = cryptd_aegis128_aesni_encrypt, - .decrypt = cryptd_aegis128_aesni_decrypt, - .init = cryptd_aegis128_aesni_init_tfm, - .exit = cryptd_aegis128_aesni_exit_tfm, - - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, - .chunksize = AEGIS128_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis128", - .cra_driver_name = "aegis128-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis128_aesni_alg = { + .setkey = crypto_aegis128_aesni_setkey, + .setauthsize = crypto_aegis128_aesni_setauthsize, + .encrypt = crypto_aegis128_aesni_encrypt, + .decrypt = crypto_aegis128_aesni_decrypt, + .init = crypto_aegis128_aesni_init_tfm, + .exit = crypto_aegis128_aesni_exit_tfm, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS128_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis128", + .cra_driver_name = "__aegis128-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis128_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis128_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis128_aesni_alg, - ARRAY_SIZE(crypto_aegis128_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis128_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis128_aesni_alg, - ARRAY_SIZE(crypto_aegis128_aesni_alg)); + simd_unregister_aeads(&crypto_aegis128_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis128_aesni_module_init); -- cgit From d628132a5e3d0183ac59a3ebbd3a9dff8b20ac7e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Mar 2019 12:00:54 -0700 Subject: crypto: x86/aegis128l - convert to use AEAD SIMD helpers Convert the x86 implementation of AEGIS-128L to use the AEAD SIMD helpers, rather than hand-rolling the same functionality. This simplifies the code and also fixes the bug where the user-provided aead_request is modified. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128l-aesni-glue.c | 157 +++++++-------------------------- 1 file changed, 30 insertions(+), 127 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c index 1b1b39c66c5e..80d917f7e467 100644 --- a/arch/x86/crypto/aegis128l-aesni-glue.c +++ b/arch/x86/crypto/aegis128l-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include #include +#include #include #include #include @@ -242,131 +242,35 @@ static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis128l_aesni_alg[] = { - { - .setkey = crypto_aegis128l_aesni_setkey, - .setauthsize = crypto_aegis128l_aesni_setauthsize, - .encrypt = crypto_aegis128l_aesni_encrypt, - .decrypt = crypto_aegis128l_aesni_decrypt, - .init = crypto_aegis128l_aesni_init_tfm, - .exit = crypto_aegis128l_aesni_exit_tfm, - - .ivsize = AEGIS128L_NONCE_SIZE, - .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, - .chunksize = AEGIS128L_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis128l", - .cra_driver_name = "__aegis128l-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis128l_aesni_setkey, - .setauthsize = cryptd_aegis128l_aesni_setauthsize, - .encrypt = cryptd_aegis128l_aesni_encrypt, - .decrypt = cryptd_aegis128l_aesni_decrypt, - .init = cryptd_aegis128l_aesni_init_tfm, - .exit = cryptd_aegis128l_aesni_exit_tfm, - - .ivsize = AEGIS128L_NONCE_SIZE, - .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, - .chunksize = AEGIS128L_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis128l", - .cra_driver_name = "aegis128l-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis128l_aesni_alg = { + .setkey = crypto_aegis128l_aesni_setkey, + .setauthsize = crypto_aegis128l_aesni_setauthsize, + .encrypt = crypto_aegis128l_aesni_encrypt, + .decrypt = crypto_aegis128l_aesni_decrypt, + .init = crypto_aegis128l_aesni_init_tfm, + .exit = crypto_aegis128l_aesni_exit_tfm, + + .ivsize = AEGIS128L_NONCE_SIZE, + .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, + .chunksize = AEGIS128L_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis128l", + .cra_driver_name = "__aegis128l-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis128l_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis128l_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis128l_aesni_alg, - ARRAY_SIZE(crypto_aegis128l_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis128l_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis128l_aesni_alg, - ARRAY_SIZE(crypto_aegis128l_aesni_alg)); + simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis128l_aesni_module_init); -- cgit From b6708c2d8fbdeecfaf8380329d4e136f68deef05 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Mar 2019 12:00:55 -0700 Subject: crypto: x86/aegis256 - convert to use AEAD SIMD helpers Convert the x86 implementation of AEGIS-256 to use the AEAD SIMD helpers, rather than hand-rolling the same functionality. This simplifies the code and also fixes the bug where the user-provided aead_request is modified. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis256-aesni-glue.c | 157 +++++++--------------------------- 1 file changed, 30 insertions(+), 127 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c index 6227ca3220a0..716eecb66bd5 100644 --- a/arch/x86/crypto/aegis256-aesni-glue.c +++ b/arch/x86/crypto/aegis256-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include #include +#include #include #include #include @@ -242,131 +242,35 @@ static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis256_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis256_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis256_aesni_alg[] = { - { - .setkey = crypto_aegis256_aesni_setkey, - .setauthsize = crypto_aegis256_aesni_setauthsize, - .encrypt = crypto_aegis256_aesni_encrypt, - .decrypt = crypto_aegis256_aesni_decrypt, - .init = crypto_aegis256_aesni_init_tfm, - .exit = crypto_aegis256_aesni_exit_tfm, - - .ivsize = AEGIS256_NONCE_SIZE, - .maxauthsize = AEGIS256_MAX_AUTH_SIZE, - .chunksize = AEGIS256_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis256", - .cra_driver_name = "__aegis256-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis256_aesni_setkey, - .setauthsize = cryptd_aegis256_aesni_setauthsize, - .encrypt = cryptd_aegis256_aesni_encrypt, - .decrypt = cryptd_aegis256_aesni_decrypt, - .init = cryptd_aegis256_aesni_init_tfm, - .exit = cryptd_aegis256_aesni_exit_tfm, - - .ivsize = AEGIS256_NONCE_SIZE, - .maxauthsize = AEGIS256_MAX_AUTH_SIZE, - .chunksize = AEGIS256_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis256", - .cra_driver_name = "aegis256-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis256_aesni_alg = { + .setkey = crypto_aegis256_aesni_setkey, + .setauthsize = crypto_aegis256_aesni_setauthsize, + .encrypt = crypto_aegis256_aesni_encrypt, + .decrypt = crypto_aegis256_aesni_decrypt, + .init = crypto_aegis256_aesni_init_tfm, + .exit = crypto_aegis256_aesni_exit_tfm, + + .ivsize = AEGIS256_NONCE_SIZE, + .maxauthsize = AEGIS256_MAX_AUTH_SIZE, + .chunksize = AEGIS256_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis256", + .cra_driver_name = "__aegis256-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis256_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis256_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis256_aesni_alg, - ARRAY_SIZE(crypto_aegis256_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis256_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis256_aesni_alg, - ARRAY_SIZE(crypto_aegis256_aesni_alg)); + simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis256_aesni_module_init); -- cgit From 477309580dcc5791a76302c53e50d0b8693b13bc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Mar 2019 12:00:56 -0700 Subject: crypto: x86/morus640 - convert to use AEAD SIMD helpers Convert the x86 implementation of MORUS-640 to use the AEAD SIMD helpers, rather than hand-rolling the same functionality. This simplifies the code and also fixes the bug where the user-provided aead_request is modified. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/morus640-sse2-glue.c | 12 ++--- arch/x86/crypto/morus640_glue.c | 85 ------------------------------------ 2 files changed, 7 insertions(+), 90 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c index 9afaf8f8565a..32da56b3bdad 100644 --- a/arch/x86/crypto/morus640-sse2-glue.c +++ b/arch/x86/crypto/morus640-sse2-glue.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -35,7 +36,9 @@ asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); +MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus640_sse2_module_init(void) { @@ -43,14 +46,13 @@ static int __init crypto_morus640_sse2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus640_sse2_algs, - ARRAY_SIZE(crypto_morus640_sse2_algs)); + return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1, + &simd_alg); } static void __exit crypto_morus640_sse2_module_exit(void) { - crypto_unregister_aeads(crypto_morus640_sse2_algs, - ARRAY_SIZE(crypto_morus640_sse2_algs)); + simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg); } module_init(crypto_morus640_sse2_module_init); diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c index cb3a81732016..1dea33d84426 100644 --- a/arch/x86/crypto/morus640_glue.c +++ b/arch/x86/crypto/morus640_glue.c @@ -11,7 +11,6 @@ * any later version. */ -#include #include #include #include @@ -200,90 +199,6 @@ void crypto_morus640_glue_init_ops(struct crypto_aead *aead, } EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops); -int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey); - -int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize); - -int cryptd_morus640_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt); - -int cryptd_morus640_glue_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt); - -int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - const char *name = crypto_aead_alg(aead)->base.cra_driver_name; - char internal_name[CRYPTO_MAX_ALG_NAME]; - - if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) - >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm); - -void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek "); MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations"); -- cgit From e151a8d28c2c99c8a9a9bfbe2bd612e692c33efd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Mar 2019 12:00:57 -0700 Subject: crypto: x86/morus1280 - convert to use AEAD SIMD helpers Convert the x86 implementations of MORUS-1280 to use the AEAD SIMD helpers, rather than hand-rolling the same functionality. This simplifies the code and also fixes the bug where the user-provided aead_request is modified. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/morus1280-avx2-glue.c | 12 ++--- arch/x86/crypto/morus1280-sse2-glue.c | 12 ++--- arch/x86/crypto/morus1280_glue.c | 85 ----------------------------------- 3 files changed, 14 insertions(+), 95 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c index 6634907d6ccd..679627a2a824 100644 --- a/arch/x86/crypto/morus1280-avx2-glue.c +++ b/arch/x86/crypto/morus1280-avx2-glue.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400); +MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus1280_avx2_module_init(void) { @@ -44,14 +47,13 @@ static int __init crypto_morus1280_avx2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus1280_avx2_algs, - ARRAY_SIZE(crypto_morus1280_avx2_algs)); + return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1, + &simd_alg); } static void __exit crypto_morus1280_avx2_module_exit(void) { - crypto_unregister_aeads(crypto_morus1280_avx2_algs, - ARRAY_SIZE(crypto_morus1280_avx2_algs)); + simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg); } module_init(crypto_morus1280_avx2_module_init); diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c index f40244eaf14d..c35c0638d0bb 100644 --- a/arch/x86/crypto/morus1280-sse2-glue.c +++ b/arch/x86/crypto/morus1280-sse2-glue.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); +MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus1280_sse2_module_init(void) { @@ -43,14 +46,13 @@ static int __init crypto_morus1280_sse2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus1280_sse2_algs, - ARRAY_SIZE(crypto_morus1280_sse2_algs)); + return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1, + &simd_alg); } static void __exit crypto_morus1280_sse2_module_exit(void) { - crypto_unregister_aeads(crypto_morus1280_sse2_algs, - ARRAY_SIZE(crypto_morus1280_sse2_algs)); + simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg); } module_init(crypto_morus1280_sse2_module_init); diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c index 7e600f8bcdad..30fc1bd98ec3 100644 --- a/arch/x86/crypto/morus1280_glue.c +++ b/arch/x86/crypto/morus1280_glue.c @@ -11,7 +11,6 @@ * any later version. */ -#include #include #include #include @@ -205,90 +204,6 @@ void crypto_morus1280_glue_init_ops(struct crypto_aead *aead, } EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops); -int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey); - -int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize); - -int cryptd_morus1280_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt); - -int cryptd_morus1280_glue_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt); - -int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - const char *name = crypto_aead_alg(aead)->base.cra_driver_name; - char internal_name[CRYPTO_MAX_ALG_NAME]; - - if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) - >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm); - -void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek "); MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations"); -- cgit From f2abe0d72b21671ad19db075262411e3d4a828dd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Mar 2019 22:12:48 -0700 Subject: crypto: x86 - convert to use crypto_simd_usable() Replace all calls to irq_fpu_usable() in the x86 crypto code with crypto_simd_usable(), in order to allow testing the no-SIMD code paths. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 8 ++++---- arch/x86/crypto/chacha_glue.c | 6 +++--- arch/x86/crypto/crc32-pclmul_glue.c | 5 +++-- arch/x86/crypto/crc32c-intel_glue.c | 7 ++++--- arch/x86/crypto/crct10dif-pclmul_glue.c | 7 ++++--- arch/x86/crypto/ghash-clmulni-intel_glue.c | 9 +++++---- arch/x86/crypto/nhpoly1305-avx2-glue.c | 5 +++-- arch/x86/crypto/nhpoly1305-sse2-glue.c | 5 +++-- arch/x86/crypto/poly1305_glue.c | 4 ++-- arch/x86/crypto/sha1_ssse3_glue.c | 7 ++++--- arch/x86/crypto/sha256_ssse3_glue.c | 7 ++++--- arch/x86/crypto/sha512_ssse3_glue.c | 10 +++++----- 12 files changed, 44 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 1466d59384ac..21c246799aa5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -30,8 +30,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -332,7 +332,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, return -EINVAL; } - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) err = crypto_aes_expand_key(ctx, in_key, key_len); else { kernel_fpu_begin(); @@ -353,7 +353,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) crypto_aes_encrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -366,7 +366,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) crypto_aes_decrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 45c1c4143176..4967ad620775 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -12,10 +12,10 @@ #include #include +#include #include #include #include -#include #include #define CHACHA_STATE_ALIGN 16 @@ -170,7 +170,7 @@ static int chacha_simd(struct skcipher_request *req) struct skcipher_walk walk; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); @@ -193,7 +193,7 @@ static int xchacha_simd(struct skcipher_request *req) u8 real_iv[16]; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_xchacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index c8d9cdacbf10..cb4ab6645106 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -32,10 +32,11 @@ #include #include #include +#include #include #include -#include +#include #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -54,7 +55,7 @@ static u32 __attribute__((pure)) unsigned int iremainder; unsigned int prealign; - if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) return crc32_le(crc, p, len); if ((long)p & SCALE_F_MASK) { diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 5773e1161072..a58fe217c856 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -29,10 +29,11 @@ #include #include #include +#include #include #include -#include +#include #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -177,7 +178,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ - if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); *crcp = crc_pcl(data, len, *crcp); kernel_fpu_end(); @@ -189,7 +190,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); kernel_fpu_end(); diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 0e785c0b2354..64f17d2d8b67 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -26,12 +26,13 @@ #include #include #include +#include #include #include #include -#include #include #include +#include asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); @@ -53,7 +54,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - if (length >= 16 && irq_fpu_usable()) { + if (length >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); kernel_fpu_end(); @@ -73,7 +74,7 @@ static int chksum_final(struct shash_desc *desc, u8 *out) static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, u8 *out) { - if (len >= 16 && irq_fpu_usable()) { + if (len >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); kernel_fpu_end(); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 3582ae885ee1..4099a0ae17dd 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,8 +19,9 @@ #include #include #include -#include +#include #include +#include #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -182,7 +183,7 @@ static int ghash_async_update(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -200,7 +201,7 @@ static int ghash_async_final(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -241,7 +242,7 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index 20d815ea4b6a..f7567cbd35b6 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -7,9 +7,10 @@ */ #include +#include #include #include -#include +#include asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len, u8 hash[NH_HASH_BYTES]); @@ -24,7 +25,7 @@ static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_avx2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !irq_fpu_usable()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index ed68d164ce14..a661ede3b5cf 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -7,9 +7,10 @@ */ #include +#include #include #include -#include +#include asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, u8 hash[NH_HASH_BYTES]); @@ -24,7 +25,7 @@ static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_sse2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !irq_fpu_usable()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 88cc01506c84..6eb65b237b3c 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -11,11 +11,11 @@ #include #include +#include #include #include #include #include -#include #include struct poly1305_simd_desc_ctx { @@ -126,7 +126,7 @@ static int poly1305_simd_update(struct shash_desc *desc, unsigned int bytes; /* kernel_fpu_begin/end is costly, use fallback for small updates */ - if (srclen <= 288 || !may_use_simd()) + if (srclen <= 288 || !crypto_simd_usable()) return crypto_poly1305_update(desc, src, srclen); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 7391c7de72c7..42f177afc33a 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -29,7 +30,7 @@ #include #include #include -#include +#include typedef void (sha1_transform_fn)(u32 *digest, const char *data, unsigned int rounds); @@ -39,7 +40,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, { struct sha1_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return crypto_sha1_update(desc, data, len); @@ -57,7 +58,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, static int sha1_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha1_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 773a873d2b28..73867da3cbee 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -30,6 +30,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -37,8 +38,8 @@ #include #include #include -#include #include +#include asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, u64 rounds); @@ -49,7 +50,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, { struct sha256_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_update(desc, data, len); @@ -67,7 +68,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, static int sha256_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha256_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index f1b811b60ba6..458356a3f124 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -28,16 +28,16 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include #include +#include #include #include #include -#include - -#include +#include asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, u64 rounds); @@ -49,7 +49,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, { struct sha512_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return crypto_sha512_update(desc, data, len); @@ -67,7 +67,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, static int sha512_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha512_finup(desc, data, len, out); kernel_fpu_begin(); -- cgit From 8db5da0b8618df79eceea99672e205d4a2a6309e Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Sun, 27 Jan 2019 19:03:45 -0500 Subject: x86/ima: require signed kernel modules Have the IMA architecture specific policy require signed kernel modules on systems with secure boot mode enabled; and coordinate the different signature verification methods, so only one signature is required. Requiring appended kernel module signatures may be configured, enabled on the boot command line, or with this patch enabled in secure boot mode. This patch defines set_module_sig_enforced(). To coordinate between appended kernel module signatures and IMA signatures, only define an IMA MODULE_CHECK policy rule if CONFIG_MODULE_SIG is not enabled. A custom IMA policy may still define and require an IMA signature. Signed-off-by: Mimi Zohar Reviewed-by: Luis Chamberlain Acked-by: Jessica Yu --- arch/x86/kernel/ima_arch.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index e47cd9390ab4..3fb9847f1cad 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -64,12 +64,19 @@ static const char * const sb_arch_rules[] = { "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig", #endif /* CONFIG_KEXEC_VERIFY_SIG */ "measure func=KEXEC_KERNEL_CHECK", +#if !IS_ENABLED(CONFIG_MODULE_SIG) + "appraise func=MODULE_CHECK appraise_type=imasig", +#endif + "measure func=MODULE_CHECK", NULL }; const char * const *arch_get_ima_policy(void) { - if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) + if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { + if (IS_ENABLED(CONFIG_MODULE_SIG)) + set_module_sig_enforced(); return sb_arch_rules; + } return NULL; } -- cgit From 86d35d4e7625f7c056d81316da107bd3a7564fb3 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 26 Mar 2019 07:40:54 +0000 Subject: drm/i915: Split Pineview device info into desktop and mobile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the IS_PINEVIEW_ macros to be removed and avoid duplication of device ids already defined in i915_pciids.h. !IS_MOBILE check can be used in place of existing IS_PINEVIEW_G call sites. Signed-off-by: Tvrtko Ursulin Suggested-by: Ville Syrjälä Cc: Ville Syrjälä Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190326074057.27833-2-tvrtko.ursulin@linux.intel.com --- arch/x86/kernel/early-quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 50d5848bf22e..f91d3ed2df62 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -525,7 +525,8 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_I945G_IDS(&gen3_early_ops), INTEL_I945GM_IDS(&gen3_early_ops), INTEL_VLV_IDS(&gen6_early_ops), - INTEL_PINEVIEW_IDS(&gen3_early_ops), + INTEL_PINEVIEW_G_IDS(&gen3_early_ops), + INTEL_PINEVIEW_M_IDS(&gen3_early_ops), INTEL_I965G_IDS(&gen3_early_ops), INTEL_G33_IDS(&gen3_early_ops), INTEL_I965GM_IDS(&gen3_early_ops), -- cgit From d53fef0be4a5f2f12219c231cdb6f838fbbf680c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 15 Mar 2019 12:19:38 -0700 Subject: x86/gpu: add ElkhartLake to gen11 early quirks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's reserve EHL stolen memory for graphics. ElkhartLake is a gen11 platform which is compatible with ICL changes. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: José Roberto de Souza Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Acked-by: Borislav Petkov Link: https://patchwork.freedesktop.org/patch/msgid/20190315191938.22211-2-rodrigo.vivi@intel.com --- arch/x86/kernel/early-quirks.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index f91d3ed2df62..6c4f01540833 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -548,6 +548,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_GLK_IDS(&gen9_early_ops), INTEL_CNL_IDS(&gen9_early_ops), INTEL_ICL_11_IDS(&gen11_early_ops), + INTEL_EHL_IDS(&gen11_early_ops), }; struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); -- cgit From d71eb0ce109a124b0fa714832823b9452f2762cf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 09:59:33 -0500 Subject: x86/speculation/mds: Add mds=full,nosmt cmdline option Add the mds=full,nosmt cmdline option. This is like mds=full, but with SMT disabled if the CPU is vulnerable. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina --- arch/x86/kernel/cpu/bugs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 373ae1dcd301..9f252082a83b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -221,6 +221,7 @@ static void x86_amd_ssb_disable(void) /* Default mitigation for L1TF-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; +static bool mds_nosmt __ro_after_init = false; static const char * const mds_strings[] = { [MDS_MITIGATION_OFF] = "Vulnerable", @@ -238,8 +239,13 @@ static void __init mds_select_mitigation(void) if (mds_mitigation == MDS_MITIGATION_FULL) { if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; + static_branch_enable(&mds_user_clear); + + if (mds_nosmt && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + cpu_smt_disable(false); } + pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -255,6 +261,10 @@ static int __init mds_cmdline(char *str) mds_mitigation = MDS_MITIGATION_OFF; else if (!strcmp(str, "full")) mds_mitigation = MDS_MITIGATION_FULL; + else if (!strcmp(str, "full,nosmt")) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_nosmt = true; + } return 0; } -- cgit From 7c3658b20194a5b3209a143f63bc9c643c6a3ae2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 10:00:14 -0500 Subject: x86/speculation: Move arch_smt_update() call to after mitigation decisions arch_smt_update() now has a dependency on both Spectre v2 and MDS mitigations. Move its initial call to after all the mitigation decisions have been made. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina --- arch/x86/kernel/cpu/bugs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9f252082a83b..3f934ffef8cf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -111,6 +111,8 @@ void __init check_bugs(void) mds_select_mitigation(); + arch_smt_update(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -638,9 +640,6 @@ specv2_set_mode: /* Set up IBPB and STIBP depending on the general spectre V2 command */ spectre_v2_user_select_mitigation(cmd); - - /* Enable STIBP if appropriate */ - arch_smt_update(); } static void update_stibp_msr(void * __unused) -- cgit From 39226ef02bfb43248b7db12a4fdccb39d95318e3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 10:00:51 -0500 Subject: x86/speculation/mds: Add SMT warning message MDS is vulnerable with SMT. Make that clear with a one-time printk whenever SMT first gets enabled. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina --- arch/x86/kernel/cpu/bugs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3f934ffef8cf..22a14d4b68a2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -673,6 +673,9 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +#undef pr_fmt +#define pr_fmt(fmt) fmt + /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { @@ -693,6 +696,8 @@ static void update_mds_branch_idle(void) static_branch_disable(&mds_idle_clear); } +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -717,6 +722,8 @@ void arch_smt_update(void) switch (mds_mitigation) { case MDS_MITIGATION_FULL: case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); update_mds_branch_idle(); break; case MDS_MITIGATION_OFF: @@ -1149,6 +1156,7 @@ static int __init l1tf_cmdline(char *str) early_param("l1tf", l1tf_cmdline); #undef pr_fmt +#define pr_fmt(fmt) fmt #ifdef CONFIG_SYSFS -- cgit From 89833fab15d6017ba006a45b5af68caa067171a7 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 29 Mar 2019 22:46:51 +0100 Subject: x86/fpu: Fix __user annotations In save_xstate_epilog(), use __user when type-casting userspace pointers. In setup_sigcontext() and x32_setup_rt_frame(), cast the userspace pointers to 'unsigned long __user *' before writing into them. These pointers are originally '__u32 __user *' or '__u64 __user *', causing sparse to complain when a userspace pointer is written into them. The casts are okay because the pointers always point to pointer-sized values. Thanks to Luc Van Oostenryck and Al Viro for explaining this to me. Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mathieu Desnoyers Cc: Mukesh Ojha Cc: Qiaowei Ren Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Will Deacon Cc: x86-ml Link: https://lkml.kernel.org/r/20190329214652.258477-3-jannh@google.com --- arch/x86/kernel/fpu/signal.c | 6 +++--- arch/x86/kernel/signal.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index f6a1d299627c..55b80de13ea5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -92,13 +92,13 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) return err; err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 *)(buf + fpu_user_xstate_size)); + (__u32 __user *)(buf + fpu_user_xstate_size)); /* * Read the xfeatures which we copied (directly from the cpu or * from the state in task struct) to the user buffers. */ - err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); + err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures); /* * For legacy compatible, we always set FP/SSE bits in the bit @@ -113,7 +113,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) */ xfeatures |= XFEATURE_MASK_FPSSE; - err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); + err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures); return err; } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 08dfd4c1a4f9..b419e1a1a0ce 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -206,7 +206,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ - put_user_ex(fpstate, &sc->fpstate); + put_user_ex(fpstate, (unsigned long __user *)&sc->fpstate); /* non-iBCS2 extensions.. */ put_user_ex(mask, &sc->oldmask); @@ -569,7 +569,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, restorer = NULL; err |= -EFAULT; } - put_user_ex(restorer, &frame->pretcode); + put_user_ex(restorer, (unsigned long __user *)&frame->pretcode); } put_user_catch(err); err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, -- cgit From 60574d1e05b094d222162260dd9cac49f4d0996a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Mar 2019 14:55:57 -0600 Subject: acpi: Create subtable parsing infrastructure Parsing entries in an ACPI table had assumed a generic header structure. There is no standard ACPI header, though, so less common layouts with different field sizes required custom parsers to go through their subtable entry list. Create the infrastructure for adding different table types so parsing the entries array may be more reused for all ACPI system tables and the common code doesn't need to be duplicated. Reviewed-by: Rafael J. Wysocki Acked-by: Jonathan Cameron Tested-by: Jonathan Cameron Signed-off-by: Keith Busch Tested-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8dcbf6890714..9fc92e4539d8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -197,7 +197,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) } static int __init -acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic *processor = NULL; #ifdef CONFIG_X86_X2APIC @@ -210,7 +210,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); #ifdef CONFIG_X86_X2APIC apic_id = processor->local_apic_id; @@ -242,7 +242,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic *processor = NULL; @@ -251,7 +251,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Ignore invalid ID */ if (processor->id == 0xff) @@ -272,7 +272,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) } static int __init -acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_sapic *processor = NULL; @@ -281,7 +281,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ processor->processor_id, /* ACPI ID */ @@ -291,7 +291,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, +acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; @@ -301,7 +301,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_lapic_addr = lapic_addr_ovr->address; @@ -309,7 +309,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, +acpi_parse_x2apic_nmi(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL; @@ -319,7 +319,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, if (BAD_MADT_ENTRY(x2apic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (x2apic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -328,7 +328,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, } static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; @@ -337,7 +337,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e if (BAD_MADT_ENTRY(lapic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (lapic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -449,7 +449,7 @@ static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, } static int __init -acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_ioapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; struct ioapic_domain_cfg cfg = { @@ -462,7 +462,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(ioapic, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */ if (ioapic->global_irq_base < nr_legacy_irqs()) @@ -508,7 +508,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, } static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, +acpi_parse_int_src_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_override *intsrc = NULL; @@ -518,7 +518,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(intsrc, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { acpi_sci_ioapic_setup(intsrc->source_irq, @@ -550,7 +550,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_nmi_source *nmi_src = NULL; @@ -559,7 +559,7 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end if (BAD_MADT_ENTRY(nmi_src, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* TBD: Support nimsrc entries? */ -- cgit From dec3d0b1071a0f3194e66a83d26ecf4aa8c5910e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Mar 2019 13:04:13 -0700 Subject: crypto: x86/crct10dif-pcl - fix use via crypto_shash_digest() The ->digest() method of crct10dif-pclmul reads the current CRC value from the shash_desc context. But this value is uninitialized, causing crypto_shash_digest() to compute the wrong result. Fix it. Probably this wasn't noticed before because lib/crc-t10dif.c only uses crypto_shash_update(), not crypto_shash_digest(). Likewise, crypto_shash_digest() is not yet tested by the crypto self-tests because those only test the ahash API which only uses shash init/update/final. Fixes: 0b95a7f85718 ("crypto: crct10dif - Glue code to cast accelerated CRCT10DIF assembly as a crypto transform") Cc: # v3.11+ Cc: Tim Chen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/crct10dif-pclmul_glue.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 64f17d2d8b67..3c81e15b0873 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -71,15 +71,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { if (len >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -88,15 +87,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { -- cgit From e43e2657fe77a37b13643e2469670ecdb0ba5e10 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Dec 2018 14:32:02 +0100 Subject: x86/dma: Remove the x86_dma_fallback_dev hack Now that we removed support for the NULL device argument in the DMA API, there is no need to cater for that in the x86 code. Signed-off-by: Christoph Hellwig --- arch/x86/include/asm/dma-mapping.h | 10 ---------- arch/x86/kernel/amd_gart_64.c | 6 ------ arch/x86/kernel/pci-dma.c | 20 -------------------- 3 files changed, 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ce4d176b3d13..6b15a24930e0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -13,14 +13,7 @@ #include #include -#ifdef CONFIG_ISA -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) -#else -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) -#endif - extern int iommu_merge; -extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; extern const struct dma_map_ops *dma_ops; @@ -30,7 +23,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -bool arch_dma_alloc_attrs(struct device **dev); -#define arch_dma_alloc_attrs arch_dma_alloc_attrs - #endif diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 2c0aa34af69c..bf7f13ea3c64 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -233,9 +233,6 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page, unsigned long bus; phys_addr_t paddr = page_to_phys(page) + offset; - if (!dev) - dev = &x86_dma_fallback_dev; - if (!need_iommu(dev, paddr, size)) return paddr; @@ -392,9 +389,6 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (nents == 0) return 0; - if (!dev) - dev = &x86_dma_fallback_dev; - out = 0; start = 0; start_sg = sg; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d460998ae828..dcd272dbd0a9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -51,14 +51,6 @@ int iommu_pass_through __read_mostly; extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; -/* Dummy device used for NULL arguments (normally ISA). */ -struct device x86_dma_fallback_dev = { - .init_name = "fallback device", - .coherent_dma_mask = ISA_DMA_BIT_MASK, - .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, -}; -EXPORT_SYMBOL(x86_dma_fallback_dev); - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; @@ -77,18 +69,6 @@ void __init pci_iommu_alloc(void) } } -bool arch_dma_alloc_attrs(struct device **dev) -{ - if (!*dev) - *dev = &x86_dma_fallback_dev; - - if (!is_device_dma_capable(*dev)) - return false; - return true; - -} -EXPORT_SYMBOL(arch_dma_alloc_attrs); - /* * See for the iommu kernel * parameter documentation. -- cgit From d75f773c86a2b8b7278e2c33343b46a4024bc002 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 25 Mar 2019 21:32:28 +0200 Subject: treewide: Switch printk users from %pf and %pF to %ps and %pS, respectively %pF and %pf are functionally equivalent to %pS and %ps conversion specifiers. The former are deprecated, therefore switch the current users to use the preferred variant. The changes have been produced by the following command: git grep -l '%p[fF]' | grep -v '^\(tools\|Documentation\)/' | \ while read i; do perl -i -pe 's/%pf/%ps/g; s/%pF/%pS/g;' $i; done And verifying the result. Link: http://lkml.kernel.org/r/20190325193229.23390-1-sakari.ailus@linux.intel.com Cc: Andy Shevchenko Cc: linux-arm-kernel@lists.infradead.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: xen-devel@lists.xenproject.org Cc: linux-acpi@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: drbd-dev@lists.linbit.com Cc: linux-block@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-nvdimm@lists.01.org Cc: linux-pci@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: linux-btrfs@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: linux-mm@kvack.org Cc: ceph-devel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Sakari Ailus Acked-by: David Sterba (for btrfs) Acked-by: Mike Rapoport (for mm/memblock.c) Acked-by: Bjorn Helgaas (for drivers/pci) Acked-by: Rafael J. Wysocki Signed-off-by: Petr Mladek --- arch/x86/include/asm/trace/exceptions.h | 2 +- arch/x86/kernel/irq_64.c | 2 +- arch/x86/mm/extable.c | 4 ++-- arch/x86/xen/multicalls.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index e0e6d7f21399..6b1e87194809 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -30,7 +30,7 @@ DECLARE_EVENT_CLASS(x86_exceptions, __entry->error_code = error_code; ), - TP_printk("address=%pf ip=%pf error_code=0x%lx", + TP_printk("address=%ps ip=%ps error_code=0x%lx", (void *)__entry->address, (void *)__entry->ip, __entry->error_code) ); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0469cd078db1..4dff56658427 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -58,7 +58,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pS)\n", current->comm, curbase, regs->sp, irq_stack_top, irq_stack_bottom, estack_top, estack_bottom, (void *)regs->ip); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 3c4568f8fb28..b0a2de8d2f9e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -145,7 +145,7 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) show_stack_regs(regs); @@ -162,7 +162,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) show_stack_regs(regs); diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 0766a08bdf45..07054572297f 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -105,7 +105,7 @@ void xen_mc_flush(void) for (i = 0; i < b->mcidx; i++) { if (b->entries[i].result < 0) { #if MC_DEBUG - pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n", + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pS\n", i + 1, b->debug[i].op, b->debug[i].args[0], -- cgit From 39ea9baffda91df8bfee9b45610242a3191ea1ec Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:30 +0200 Subject: x86/fpu: Remove fpu->initialized usage in __fpu__restore_sig() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a preparation for the removal of the ->initialized member in the fpu struct. __fpu__restore_sig() is deactivating the FPU via fpu__drop() and then setting manually ->initialized followed by fpu__restore(). The result is that it is possible to manipulate fpu->state and the state of registers won't be saved/restored on a context switch which would overwrite fpu->state: fpu__drop(fpu): ... fpu->initialized = 0; preempt_enable(); <--- context switch Don't access the fpu->state while the content is read from user space and examined/sanitized. Use a temporary kmalloc() buffer for the preparation of the FPU registers and once the state is considered okay, load it. Should something go wrong, return with an error and without altering the original FPU registers. The removal of fpu__initialize() is a nop because fpu->initialized is already set for the user task. [ bp: Massage a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-2-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/signal.h | 2 +- arch/x86/kernel/fpu/regset.c | 5 ++--- arch/x86/kernel/fpu/signal.c | 40 +++++++++++++++------------------------ 3 files changed, 18 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 44bbc39a57b3..7fb516b6893a 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -22,7 +22,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); -extern void convert_to_fxsr(struct task_struct *tsk, +extern void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env); unsigned long diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index bc02f5144b95..5dbc099178a8 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -269,11 +269,10 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } -void convert_to_fxsr(struct task_struct *tsk, +void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env) { - struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -350,7 +349,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); if (!ret) - convert_to_fxsr(target, &env); + convert_to_fxsr(&target->thread.fpu.state.fxsave, &env); /* * update the header bit in the xsave header, indicating the diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 55b80de13ea5..7296a9bb78e7 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -207,11 +207,11 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) } static inline void -sanitize_restored_xstate(struct task_struct *tsk, +sanitize_restored_xstate(union fpregs_state *state, struct user_i387_ia32_struct *ia32_env, u64 xfeatures, int fx_only) { - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct xregs_state *xsave = &state->xsave; struct xstate_header *header = &xsave->header; if (use_xsave()) { @@ -238,7 +238,7 @@ sanitize_restored_xstate(struct task_struct *tsk, */ xsave->i387.mxcsr &= mxcsr_feature_mask; - convert_to_fxsr(tsk, ia32_env); + convert_to_fxsr(&state->fxsave, ia32_env); } } @@ -284,8 +284,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(buf, size)) return -EACCES; - fpu__initialize(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), @@ -315,40 +313,32 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * header. Validate and sanitize the copied state. */ struct user_i387_ia32_struct env; + union fpregs_state *state; int err = 0; + void *tmp; - /* - * Drop the current fpu which clears fpu->initialized. This ensures - * that any context-switch during the copy of the new state, - * avoids the intermediate state from getting restored/saved. - * Thus avoiding the new restored state from getting corrupted. - * We will be ready to restore/save the state only after - * fpu->initialized is again set. - */ - fpu__drop(fpu); + tmp = kzalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + state = PTR_ALIGN(tmp, 64); if (using_compacted_format()) { - err = copy_user_to_xstate(&fpu->state.xsave, buf_fx); + err = copy_user_to_xstate(&state->xsave, buf_fx); } else { - err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); + err = __copy_from_user(&state->xsave, buf_fx, state_size); if (!err && state_size > offsetof(struct xregs_state, header)) - err = validate_xstate_header(&fpu->state.xsave.header); + err = validate_xstate_header(&state->xsave.header); } if (err || __copy_from_user(&env, buf, sizeof(env))) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); err = -1; } else { - sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); + sanitize_restored_xstate(state, &env, xfeatures, fx_only); + copy_kernel_to_fpregs(state); } - local_bh_disable(); - fpu->initialized = 1; - fpu__restore(fpu); - local_bh_enable(); - + kfree(tmp); return err; } else { /* -- cgit From 6dd677a044e606fd343e31c2108b13d74aec1ca5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:31 +0200 Subject: x86/fpu: Remove fpu__restore() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users of fpu__restore() so it is time to remove it. The comment regarding fpu__restore() and TS bit is stale since commit b3b0870ef3ffe ("i387: do not preload FPU state at task switch time") and has no meaning since. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: Babu Moger Cc: "Chang S. Bae" Cc: Dmitry Safonov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: Joerg Roedel Cc: Jonathan Corbet Cc: kvm ML Cc: linux-doc@vger.kernel.org Cc: Nicolai Stange Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-3-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 1 - arch/x86/kernel/fpu/core.c | 24 ------------------------ arch/x86/kernel/process_32.c | 4 +--- arch/x86/kernel/process_64.c | 4 +--- 4 files changed, 2 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index fb04a3ded7dd..75a1d5f712ee 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -28,7 +28,6 @@ extern void fpu__initialize(struct fpu *fpu); extern void fpu__prepare_read(struct fpu *fpu); extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); -extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2e5003fef51a..1d3ae7988f7f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -303,30 +303,6 @@ void fpu__prepare_write(struct fpu *fpu) } } -/* - * 'fpu__restore()' is called to copy FPU registers from - * the FPU fpstate to the live hw registers and to activate - * access to the hardware registers, so that FPU instructions - * can be used afterwards. - * - * Must be called with kernel preemption disabled (for example - * with local interrupts disabled, as it is in the case of - * do_device_not_available()). - */ -void fpu__restore(struct fpu *fpu) -{ - fpu__initialize(fpu); - - /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ - kernel_fpu_disable(); - trace_x86_fpu_before_restore(fpu); - fpregs_activate(fpu); - copy_kernel_to_fpregs(&fpu->state); - trace_x86_fpu_after_restore(fpu); - kernel_fpu_enable(); -} -EXPORT_SYMBOL_GPL(fpu__restore); - /* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e471d8e6f0b2..7888a41a03cd 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -267,9 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before fpu__restore(), so the TS bit is up - * to date. + * the GDT and LDT are properly updated. */ arch_end_context_switch(next_p); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6a62f4af9fcf..e1983b3a16c4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -538,9 +538,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Leave lazy mode, flushing any hypercalls made here. This * must be done after loading TLS entries in the GDT but before - * loading segments that might reference them, and and it must - * be done before fpu__restore(), so the TS bit is up to - * date. + * loading segments that might reference them. */ arch_end_context_switch(next_p); -- cgit From 60e528d6ce3f60a058bbb64f8acb2a07f84b172a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:32 +0200 Subject: x86/fpu: Remove preempt_disable() in fpu__clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The preempt_disable() section was introduced in commit a10b6a16cdad8 ("x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic") and it was said to be temporary. fpu__initialize() initializes the FPU struct to its initial value and then sets ->initialized to 1. The last part is the important one. The content of the state does not matter because it gets set via copy_init_fpstate_to_fpregs(). A preemption here has little meaning because the registers will always be set to the same content after copy_init_fpstate_to_fpregs(). A softirq with a kernel_fpu_begin() could also force to save FPU's registers after fpu__initialize() without changing the outcome here. Remove the preempt_disable() section in fpu__clear(), preemption here does not hurt. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Nicolai Stange Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-4-bigeasy@linutronix.de --- arch/x86/kernel/fpu/core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1d3ae7988f7f..1940319268ae 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -366,11 +366,9 @@ void fpu__clear(struct fpu *fpu) * Make sure fpstate is cleared and initialized. */ if (static_cpu_has(X86_FEATURE_FPU)) { - preempt_disable(); fpu__initialize(fpu); user_fpu_begin(); copy_init_fpstate_to_fpregs(); - preempt_enable(); } } -- cgit From 88f5260a3bf9bfb276b5b4aac2e81587e425a1d7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:33 +0200 Subject: x86/fpu: Always init the state in fpu__clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fpu__clear() only initializes the state if the CPU has FPU support. This initialisation is also required for FPU-less systems and takes place in math_emulate(). Since fpu__initialize() only performs the initialization if ->initialized is zero it does not matter that it is invoked each time an opcode is emulated. It makes the removal of ->initialized easier if the struct is also initialized in the FPU-less case at the same time. Move fpu__initialize() before the FPU feature check so it is also performed in the FPU-less case too. [ bp: Massage a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: Bill Metzenthen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Nicolai Stange Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-5-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 1 - arch/x86/kernel/fpu/core.c | 5 ++--- arch/x86/math-emu/fpu_entry.c | 3 --- 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 75a1d5f712ee..70ecb7c032cb 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -24,7 +24,6 @@ /* * High level FPU state handling functions: */ -extern void fpu__initialize(struct fpu *fpu); extern void fpu__prepare_read(struct fpu *fpu); extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1940319268ae..e43296854e37 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -223,7 +223,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Activate the current task's in-memory FPU context, * if it has not been used before: */ -void fpu__initialize(struct fpu *fpu) +static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); @@ -236,7 +236,6 @@ void fpu__initialize(struct fpu *fpu) fpu->initialized = 1; } } -EXPORT_SYMBOL_GPL(fpu__initialize); /* * This function must be called before we read a task's fpstate. @@ -365,8 +364,8 @@ void fpu__clear(struct fpu *fpu) /* * Make sure fpstate is cleared and initialized. */ + fpu__initialize(fpu); if (static_cpu_has(X86_FEATURE_FPU)) { - fpu__initialize(fpu); user_fpu_begin(); copy_init_fpstate_to_fpregs(); } diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9e2ba7e667f6..a873da6b46d6 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -113,9 +113,6 @@ void math_emulate(struct math_emu_info *info) unsigned long code_base = 0; unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ struct desc_struct code_descriptor; - struct fpu *fpu = ¤t->thread.fpu; - - fpu__initialize(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { -- cgit From fbcc9e0c37ba3186c41b5eb1aee2f7f3b711bc1e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:34 +0200 Subject: x86/fpu: Remove fpu->initialized usage in copy_fpstate_to_sigframe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With lazy-FPU support the (now named variable) ->initialized was set to true if the CPU's FPU registers were holding a valid state of the FPU registers for the active process. If it was set to false then the FPU state was saved in fpu->state and the FPU was deactivated. With lazy-FPU gone, ->initialized is always true for user threads and kernel threads never call this function so ->initialized is always true in copy_fpstate_to_sigframe(). The using_compacted_format() check is also a leftover from the lazy-FPU time. In the ->initialized == false case copy_to_user() would copy the compacted buffer while userland would expect the non-compacted format instead. So in order to save the FPU state in the non-compacted form it issues XSAVE to save the *current* FPU state. If the FPU is not enabled, the attempt raises the FPU trap, the trap restores the FPU contents and re-enables the FPU and XSAVE is invoked again and succeeds. *This* does not longer work since commit bef8b6da9522 ("x86/fpu: Handle #NM without FPU emulation as an error") Remove the check for ->initialized because it is always true and remove the false condition. Update the comment to reflect that the state is always live. [ bp: Massage. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-6-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 7296a9bb78e7..c1a5999affa0 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -144,9 +144,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * - * If the fpu, extended register state is live, save the state directly - * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, - * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * Save the state directly to the user frame pointed by the aligned pointer + * 'buf_fx'. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. @@ -157,7 +156,6 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { struct fpu *fpu = ¤t->thread.fpu; - struct xregs_state *xsave = &fpu->state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); @@ -172,29 +170,12 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpu->initialized || using_compacted_format()) { - /* Save the live register state to the user directly. */ - if (copy_fpregs_to_sigframe(buf_fx)) - return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - copy_fxregs_to_kernel(fpu); - } else { - /* - * It is a *bug* if kernel uses compacted-format for xsave - * area and we copy it out directly to a signal frame. It - * should have been handled above by saving the registers - * directly. - */ - if (boot_cpu_has(X86_FEATURE_XSAVES)) { - WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n"); - return -1; - } - - fpstate_sanitize_xstate(fpu); - if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) - return -1; - } + /* Save the live registers state to the user frame directly. */ + if (copy_fpregs_to_sigframe(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + copy_fxregs_to_kernel(fpu); /* Save the fsave header for the 32-bit frames. */ if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) -- cgit From 39388e80f9b0c3788bfb6efe3054bdce0c3ead45 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:35 +0200 Subject: x86/fpu: Don't save fxregs for ia32 frames in copy_fpstate_to_sigframe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 72a671ced66db ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels") the 32bit and 64bit path of the signal delivery code were merged. The 32bit version: int save_i387_xstate_ia32(void __user *buf) … if (cpu_has_xsave) return save_i387_xsave(fp); if (cpu_has_fxsr) return save_i387_fxsave(fp); The 64bit version: int save_i387_xstate(void __user *buf) … if (user_has_fpu()) { if (use_xsave()) err = xsave_user(buf); else err = fxsave_user(buf); if (unlikely(err)) { __clear_user(buf, xstate_size); return err; The merge: int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) … if (user_has_fpu()) { /* Save the live register state to the user directly. */ if (save_user_xstate(buf_fx)) return -1; /* Update the thread's fxstate to save the fsave header. */ if (ia32_fxstate) fpu_fxsave(&tsk->thread.fpu); I don't think that we needed to save the FPU registers to ->thread.fpu because the registers were stored in buf_fx. Today the state will be restored from buf_fx after the signal was handled (I assume that this was also the case with lazy-FPU). Since commit 66463db4fc560 ("x86, fpu: shift drop_init_fpu() from save_xstate_sig() to handle_signal()") it is ensured that the signal handler starts with clear/fresh set of FPU registers which means that the previous store is futile. Remove the copy_fxregs_to_kernel() call because task's FPU state is cleared later in handle_signal() via fpu__clear(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-7-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index c1a5999affa0..34989d2a8893 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -155,7 +155,6 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) */ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { - struct fpu *fpu = ¤t->thread.fpu; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); @@ -173,9 +172,6 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) /* Save the live registers state to the user frame directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - copy_fxregs_to_kernel(fpu); /* Save the fsave header for the 32-bit frames. */ if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) -- cgit From 2722146eb78451b30e4717a267a3a2b44e4ad317 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:36 +0200 Subject: x86/fpu: Remove fpu->initialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct fpu.initialized member is always set to one for user tasks and zero for kernel tasks. This avoids saving/restoring the FPU registers for kernel threads. The ->initialized = 0 case for user tasks has been removed in previous changes, for instance, by doing an explicit unconditional init at fork() time for FPU-less systems which was otherwise delayed until the emulated opcode. The context switch code (switch_fpu_prepare() + switch_fpu_finish()) can't unconditionally save/restore registers for kernel threads. Not only would it slow down the switch but also load a zeroed xcomp_bv for XSAVES. For kernel_fpu_begin() (+end) the situation is similar: EFI with runtime services uses this before alternatives_patched is true. Which means that this function is used too early and it wasn't the case before. For those two cases, use current->mm to distinguish between user and kernel thread. For kernel_fpu_begin() skip save/restore of the FPU registers. During the context switch into a kernel thread don't do anything. There is no reason to save the FPU state of a kernel thread. The reordering in __switch_to() is important because the current() pointer needs to be valid before switch_fpu_finish() is invoked so ->mm is seen of the new task instead the old one. N.B.: fpu__save() doesn't need to check ->mm because it is called by user tasks only. [ bp: Massage. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: Babu Moger Cc: "Chang S. Bae" Cc: Dmitry Safonov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: Joerg Roedel Cc: kvm ML Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Nicolai Stange Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Will Deacon Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-8-bigeasy@linutronix.de --- arch/x86/ia32/ia32_signal.c | 17 ++++----- arch/x86/include/asm/fpu/internal.h | 18 +++++----- arch/x86/include/asm/fpu/types.h | 9 ----- arch/x86/include/asm/trace/fpu.h | 5 +-- arch/x86/kernel/fpu/core.c | 70 +++++++++++-------------------------- arch/x86/kernel/fpu/init.c | 2 -- arch/x86/kernel/fpu/regset.c | 19 +++------- arch/x86/kernel/fpu/xstate.c | 2 -- arch/x86/kernel/process_32.c | 4 +-- arch/x86/kernel/process_64.c | 4 +-- arch/x86/kernel/signal.c | 17 ++++----- arch/x86/mm/pkeys.c | 7 +--- 12 files changed, 53 insertions(+), 121 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 321fe5f5d0e9..6eeb3249f22f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -216,8 +216,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { - struct fpu *fpu = ¤t->thread.fpu; - unsigned long sp; + unsigned long sp, fx_aligned, math_size; /* Default to using normal stack */ sp = regs->sp; @@ -231,15 +230,11 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (fpu->initialized) { - unsigned long fx_aligned, math_size; - - sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); - *fpstate = (struct _fpstate_32 __user *) sp; - if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, - math_size) < 0) - return (void __user *) -1L; - } + sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); + *fpstate = (struct _fpstate_32 __user *) sp; + if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, + math_size) < 0) + return (void __user *) -1L; sp -= frame_size; /* Align the stack pointer according to the i386 ABI, diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 70ecb7c032cb..04042eacc852 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -494,11 +494,14 @@ static inline void fpregs_activate(struct fpu *fpu) * * - switch_fpu_finish() restores the new state as * necessary. + * + * The FPU context is only stored/restored for a user task and + * ->mm is used to distinguish between kernel and user threads. */ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && current->mm) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -506,8 +509,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) /* But leave fpu_fpregs_owner_ctx! */ trace_x86_fpu_regs_deactivated(old_fpu); - } else - old_fpu->last_cpu = -1; + } } /* @@ -520,12 +522,12 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) { - bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->initialized; + if (static_cpu_has(X86_FEATURE_FPU)) { + if (!fpregs_state_valid(new_fpu, cpu)) { + if (current->mm) + copy_kernel_to_fpregs(&new_fpu->state); + } - if (preload) { - if (!fpregs_state_valid(new_fpu, cpu)) - copy_kernel_to_fpregs(&new_fpu->state); fpregs_activate(new_fpu); } } diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 2e32e178e064..f098f6cab94b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -293,15 +293,6 @@ struct fpu { */ unsigned int last_cpu; - /* - * @initialized: - * - * This flag indicates whether this context is initialized: if the task - * is not running then we can restore from this context, if the task - * is running then we should save into this context. - */ - unsigned char initialized; - /* * @avx512_timestamp: * diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 069c04be1507..bd65f6ba950f 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -13,22 +13,19 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, initialized) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->initialized = fpu->initialized; if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->initialized, __entry->xfeatures, __entry->xcomp_bv ) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e43296854e37..97e27de2b7c0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -101,7 +101,7 @@ static void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->initialized) { + if (current->mm) { /* * Ignore return value -- we don't care if reg state * is clobbered. @@ -116,7 +116,7 @@ static void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu; - if (fpu->initialized) + if (current->mm) copy_kernel_to_fpregs(&fpu->state); kernel_fpu_enable(); @@ -147,11 +147,10 @@ void fpu__save(struct fpu *fpu) preempt_disable(); trace_x86_fpu_before_save(fpu); - if (fpu->initialized) { - if (!copy_fpregs_to_fpstate(fpu)) { - copy_kernel_to_fpregs(&fpu->state); - } - } + + if (!copy_fpregs_to_fpstate(fpu)) + copy_kernel_to_fpregs(&fpu->state); + trace_x86_fpu_after_save(fpu); preempt_enable(); } @@ -190,7 +189,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { dst_fpu->last_cpu = -1; - if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) + if (!static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); @@ -227,14 +226,10 @@ static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - if (!fpu->initialized) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); + fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); - trace_x86_fpu_activate_state(fpu); - /* Safe to do for the current task: */ - fpu->initialized = 1; - } + trace_x86_fpu_activate_state(fpu); } /* @@ -247,32 +242,20 @@ static void fpu__initialize(struct fpu *fpu) * * - or it's called for stopped tasks (ptrace), in which case the * registers were already saved by the context-switch code when - * the task scheduled out - we only have to initialize the registers - * if they've never been initialized. + * the task scheduled out. * * If the task has used the FPU before then save it. */ void fpu__prepare_read(struct fpu *fpu) { - if (fpu == ¤t->thread.fpu) { + if (fpu == ¤t->thread.fpu) fpu__save(fpu); - } else { - if (!fpu->initialized) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for current and for stopped child tasks: */ - fpu->initialized = 1; - } - } } /* * This function must be called before we write a task's fpstate. * - * If the task has used the FPU before then invalidate any cached FPU registers. - * If the task has not used the FPU before then initialize its fpstate. + * Invalidate any cached FPU registers. * * After this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will @@ -289,17 +272,8 @@ void fpu__prepare_write(struct fpu *fpu) */ WARN_ON_FPU(fpu == ¤t->thread.fpu); - if (fpu->initialized) { - /* Invalidate any cached state: */ - __fpu_invalidate_fpregs_state(fpu); - } else { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for stopped child tasks: */ - fpu->initialized = 1; - } + /* Invalidate any cached state: */ + __fpu_invalidate_fpregs_state(fpu); } /* @@ -316,17 +290,13 @@ void fpu__drop(struct fpu *fpu) preempt_disable(); if (fpu == ¤t->thread.fpu) { - if (fpu->initialized) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - fpregs_deactivate(fpu); - } + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + fpregs_deactivate(fpu); } - fpu->initialized = 0; - trace_x86_fpu_dropped(fpu); preempt_enable(); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6abd83572b01..20d8fa7124c7 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -239,8 +239,6 @@ static void __init fpu__init_system_ctx_switch(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - - WARN_ON_FPU(current->thread.fpu.initialized); } /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 5dbc099178a8..d652b939ccfb 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -15,16 +15,12 @@ */ int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { - struct fpu *target_fpu = &target->thread.fpu; - - return target_fpu->initialized ? regset->n : 0; + return regset->n; } int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { - struct fpu *target_fpu = &target->thread.fpu; - - if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized) + if (boot_cpu_has(X86_FEATURE_FXSR)) return regset->n; else return 0; @@ -370,16 +366,9 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) { struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; - int fpvalid; - - fpvalid = fpu->initialized; - if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, - 0, sizeof(struct user_i387_ia32_struct), - ufpu, NULL); - return fpvalid; + return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), + ufpu, NULL); } EXPORT_SYMBOL(dump_fpu); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d7432c2b1051..8bfcc5b9e04b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -892,8 +892,6 @@ const void *get_xsave_field_ptr(int xsave_state) { struct fpu *fpu = ¤t->thread.fpu; - if (!fpu->initialized) - return NULL; /* * fpu__save() takes the CPU's xstate registers * and saves them off to the 'fpu memory buffer. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7888a41a03cd..77d9eb43ccac 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -288,10 +288,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) lazy_load_gs(next->gs); - switch_fpu_finish(next_fpu, cpu); - this_cpu_write(current_task, next_p); + switch_fpu_finish(next_fpu, cpu); + /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e1983b3a16c4..ffea7c557963 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -566,14 +566,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) x86_fsgsbase_load(prev, next); - switch_fpu_finish(next_fpu, cpu); - /* * Switch the PDA and FPU contexts. */ this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); + switch_fpu_finish(next_fpu, cpu); + /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b419e1a1a0ce..87b327c6cb10 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -246,7 +246,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long sp = regs->sp; unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); - struct fpu *fpu = ¤t->thread.fpu; + int ret; /* redzone */ if (IS_ENABLED(CONFIG_X86_64)) @@ -265,11 +265,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, sp = (unsigned long) ka->sa.sa_restorer; } - if (fpu->initialized) { - sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), - &buf_fx, &math_size); - *fpstate = (void __user *)sp; - } + sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), + &buf_fx, &math_size); + *fpstate = (void __user *)sp; sp = align_sigframe(sp - frame_size); @@ -281,8 +279,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if (fpu->initialized && - copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) + ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size); + if (ret < 0) return (void __user *)-1L; return (void __user *)sp; @@ -763,8 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (fpu->initialized) - fpu__clear(fpu); + fpu__clear(fpu); } signal_setup_done(failed, ksig, stepping); } diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 047a77f6a10c..05bb9a44eb1c 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -39,17 +39,12 @@ int __execute_only_pkey(struct mm_struct *mm) * dance to set PKRU if we do not need to. Check it * first and assume that if the execute-only pkey is * write-disabled that we do not have to set it - * ourselves. We need preempt off so that nobody - * can make fpregs inactive. + * ourselves. */ - preempt_disable(); if (!need_to_set_mm_pkey && - current->thread.fpu.initialized && !__pkru_allows_read(read_pkru(), execute_only_pkey)) { - preempt_enable(); return execute_only_pkey; } - preempt_enable(); /* * Set up PKRU so that it denies access for everything -- cgit From 0169f53e0d97bb675075506810494bd86b8c934e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:37 +0200 Subject: x86/fpu: Remove user_fpu_begin() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit user_fpu_begin() sets fpu_fpregs_owner_ctx to task's fpu struct. This is always the case since there is no lazy FPU anymore. fpu_fpregs_owner_ctx is used during context switch to decide if it needs to load the saved registers or if the currently loaded registers are valid. It could be skipped during a taskA -> kernel thread -> taskA switch because the switch to the kernel thread would not alter the CPU's sFPU tate. Since this field is always updated during context switch and never invalidated, setting it manually (in user context) makes no difference. A kernel thread with kernel_fpu_begin() block could set fpu_fpregs_owner_ctx to NULL but a kernel thread does not use user_fpu_begin(). This is a leftover from the lazy-FPU time. Remove user_fpu_begin(), it does not change fpu_fpregs_owner_ctx's content. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Nicolai Stange Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-9-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 17 ----------------- arch/x86/kernel/fpu/core.c | 4 +--- arch/x86/kernel/fpu/signal.c | 1 - 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 04042eacc852..54f70cae2f15 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -532,23 +532,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) } } -/* - * Needs to be preemption-safe. - * - * NOTE! user_fpu_begin() must be used only immediately before restoring - * the save state. It does not do any saving/restoring on its own. In - * lazy FPU mode, it is just an optimization to avoid a #NM exception, - * the task can lose the FPU right after preempt_enable(). - */ -static inline void user_fpu_begin(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - - preempt_disable(); - fpregs_activate(fpu); - preempt_enable(); -} - /* * MXCSR and XCR definitions: */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 97e27de2b7c0..739ca3ae2bdc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -335,10 +335,8 @@ void fpu__clear(struct fpu *fpu) * Make sure fpstate is cleared and initialized. */ fpu__initialize(fpu); - if (static_cpu_has(X86_FEATURE_FPU)) { - user_fpu_begin(); + if (static_cpu_has(X86_FEATURE_FPU)) copy_init_fpstate_to_fpregs(); - } } /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 34989d2a8893..155f4552413e 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -322,7 +322,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * For 64-bit frames and 32-bit fsave frames, restore the user * state to the registers directly (with exceptions handled). */ - user_fpu_begin(); if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { fpu__clear(fpu); return -1; -- cgit From 4ee91519e1dccc175665fe24bb20a47c6053575c Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 3 Apr 2019 18:41:38 +0200 Subject: x86/fpu: Add an __fpregs_load_activate() internal helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper function that ensures the floating point registers for the current task are active. Use with preemption disabled. While at it, add fpregs_lock/unlock() helpers too, to be used in later patches. [ bp: Add a comment about its intended usage. ] Signed-off-by: Rik van Riel Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-10-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/api.h | 11 +++++++++++ arch/x86/include/asm/fpu/internal.h | 22 ++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b56d504af654..73e684160f35 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,6 +10,7 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H +#include /* * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It @@ -22,6 +23,16 @@ extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); +static inline void fpregs_lock(void) +{ + preempt_disable(); +} + +static inline void fpregs_unlock(void) +{ + preempt_enable(); +} + /* * Query the presence of one or more xfeatures. Works on any legacy CPU as well. * diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 54f70cae2f15..3e0c2c496f2d 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -484,6 +484,18 @@ static inline void fpregs_activate(struct fpu *fpu) trace_x86_fpu_regs_activated(fpu); } +/* + * Internal helper, do not use directly. Use switch_fpu_return() instead. + */ +static inline void __fpregs_load_activate(struct fpu *fpu, int cpu) +{ + if (!fpregs_state_valid(fpu, cpu)) { + if (current->mm) + copy_kernel_to_fpregs(&fpu->state); + fpregs_activate(fpu); + } +} + /* * FPU state switching for scheduling. * @@ -522,14 +534,8 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU)) { - if (!fpregs_state_valid(new_fpu, cpu)) { - if (current->mm) - copy_kernel_to_fpregs(&new_fpu->state); - } - - fpregs_activate(new_fpu); - } + if (static_cpu_has(X86_FEATURE_FPU)) + __fpregs_load_activate(new_fpu, cpu); } /* -- cgit From 07baeb04f37c952981d63359ff840118ce8f5434 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:39 +0200 Subject: x86/fpu: Make __raw_xsave_addr() use a feature number instead of mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most users of __raw_xsave_addr() use a feature number, shift it to a mask and then __raw_xsave_addr() shifts it back to the feature number. Make __raw_xsave_addr() use the feature number as an argument. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-11-bigeasy@linutronix.de --- arch/x86/kernel/fpu/xstate.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 8bfcc5b9e04b..4f7f3c5d0d0c 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -805,20 +805,18 @@ void fpu__resume_cpu(void) } /* - * Given an xstate feature mask, calculate where in the xsave + * Given an xstate feature nr, calculate where in the xsave * buffer the state is. Callers should ensure that the buffer * is valid. */ -static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) +static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { - int feature_nr = fls64(xstate_feature_mask) - 1; - - if (!xfeature_enabled(feature_nr)) { + if (!xfeature_enabled(xfeature_nr)) { WARN_ON_FPU(1); return NULL; } - return (void *)xsave + xstate_comp_offsets[feature_nr]; + return (void *)xsave + xstate_comp_offsets[xfeature_nr]; } /* * Given the xsave area and a state inside, this function returns the @@ -840,6 +838,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask */ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) { + int xfeature_nr; /* * Do we even *have* xsave state? */ @@ -867,7 +866,8 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) if (!(xsave->header.xfeatures & xstate_feature)) return NULL; - return __raw_xsave_addr(xsave, xstate_feature); + xfeature_nr = fls64(xstate_feature) - 1; + return __raw_xsave_addr(xsave, xfeature_nr); } EXPORT_SYMBOL_GPL(get_xsave_addr); @@ -1014,7 +1014,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, 1 << i); + void *src = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1100,7 +1100,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, 1 << i); + void *src = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1157,7 +1157,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) u64 mask = ((u64)1 << i); if (hdr.xfeatures & mask) { - void *dst = __raw_xsave_addr(xsave, 1 << i); + void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1211,7 +1211,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) u64 mask = ((u64)1 << i); if (hdr.xfeatures & mask) { - void *dst = __raw_xsave_addr(xsave, 1 << i); + void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; -- cgit From abd16d68d65229e5acafdadc32704239131bf2ea Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:40 +0200 Subject: x86/fpu: Use a feature number instead of mask in two more helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After changing the argument of __raw_xsave_addr() from a mask to number Dave suggested to check if it makes sense to do the same for get_xsave_addr(). As it turns out it does. Only get_xsave_addr() needs the mask to check if the requested feature is part of what is supported/saved and then uses the number again. The shift operation is cheaper compared to fls64() (find last bit set). Also, the feature number uses less opcode space compared to the mask. :) Make the get_xsave_addr() argument a xfeature number instead of a mask and fix up its callers. Furthermore, use xfeature_nr and xfeature_mask consistently. This results in the following changes to the kvm code: feature -> xfeature_mask index -> xfeature_nr Suggested-by: Dave Hansen Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Masami Hiramatsu Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: "Radim Krčmář" Cc: Rasmus Villemoes Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Siarhei Liakh Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-12-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/xstate.h | 4 ++-- arch/x86/kernel/fpu/xstate.c | 22 ++++++++++------------ arch/x86/kernel/traps.c | 2 +- arch/x86/kvm/x86.c | 28 ++++++++++++++-------------- arch/x86/mm/mpx.c | 6 +++--- 5 files changed, 30 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 48581988d78c..fbe41f808e5d 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -46,8 +46,8 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void fpu__xstate_clear_all_cpu_caps(void); -void *get_xsave_addr(struct xregs_state *xsave, int xstate); -const void *get_xsave_field_ptr(int xstate_field); +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); +const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4f7f3c5d0d0c..9c459fd1d38e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -830,15 +830,14 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) * * Inputs: * xstate: the thread's storage area for all FPU data - * xstate_feature: state which is defined in xsave.h (e.g. - * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) + * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. */ -void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { - int xfeature_nr; /* * Do we even *have* xsave state? */ @@ -850,11 +849,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * have not enabled. Remember that pcntxt_mask is * what we write to the XCR0 register. */ - WARN_ONCE(!(xfeatures_mask & xstate_feature), + WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to - * have requested that 'xstate_feature' be saved. + * have requested that 'xfeature_nr' be saved. * If it did not, we might be seeing and old value * of the field in the buffer. * @@ -863,10 +862,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * or because the "init optimization" caused it * to not be saved. */ - if (!(xsave->header.xfeatures & xstate_feature)) + if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) return NULL; - xfeature_nr = fls64(xstate_feature) - 1; return __raw_xsave_addr(xsave, xfeature_nr); } EXPORT_SYMBOL_GPL(get_xsave_addr); @@ -882,13 +880,13 @@ EXPORT_SYMBOL_GPL(get_xsave_addr); * Note that this only works on the current task. * * Inputs: - * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, - * XFEATURE_MASK_SSE, etc...) + * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area or NULL if the state * is not present or is in its 'init state'. */ -const void *get_xsave_field_ptr(int xsave_state) +const void *get_xsave_field_ptr(int xfeature_nr) { struct fpu *fpu = ¤t->thread.fpu; @@ -898,7 +896,7 @@ const void *get_xsave_field_ptr(int xsave_state) */ fpu__save(fpu); - return get_xsave_addr(&fpu->state.xsave, xsave_state); + return get_xsave_addr(&fpu->state.xsave, xfeature_nr); } #ifdef CONFIG_ARCH_HAS_PKEYS diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d26f9e9c3d83..8b6d03e55d2f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -456,7 +456,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) * which is all zeros which indicates MPX was not * responsible for the exception. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) goto exit_trap; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 099b851dabaf..8022e7769b3a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3674,15 +3674,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *src = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *src = get_xsave_addr(xsave, xfeature_nr); if (src) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(dest + offset, &vcpu->arch.pkru, sizeof(vcpu->arch.pkru)); else @@ -3690,7 +3690,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) } - valid -= feature; + valid -= xfeature_mask; } } @@ -3717,22 +3717,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *dest = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *dest = get_xsave_addr(xsave, xfeature_nr); if (dest) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(&vcpu->arch.pkru, src + offset, sizeof(vcpu->arch.pkru)); else memcpy(dest, src + offset, size); } - valid -= feature; + valid -= xfeature_mask; } } @@ -8850,11 +8850,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_MASK_BNDREGS); + XFEATURE_BNDREGS); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_MASK_BNDCSR); + XFEATURE_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); if (init_event) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c805db6236b4..59726aaf4671 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) goto err_out; } /* get bndregs field from current task's xsave area */ - bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS); + bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; @@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void) * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; @@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void) const struct mpx_bndcsr *bndcsr; struct mm_struct *mm = current->mm; - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return -EINVAL; /* -- cgit From 3d45ad9260c35c597706847e196aae8d966a574f Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 3 Apr 2019 22:12:17 -0400 Subject: x86/ima: add missing include As reported by 0-DAY kernel test infrastructure: arch/x86//kernel/ima_arch.c: In function 'arch_get_ima_policy': >> arch/x86//kernel/ima_arch.c:78:4: error: implicit declaration of function 'set_module_sig_enforced' [-Werror=implicit-function-declaration] Signed-off-by: Mimi Zohar --- arch/x86/kernel/ima_arch.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index 3fb9847f1cad..85de790583f9 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -3,6 +3,7 @@ * Copyright (C) 2018 IBM Corporation */ #include +#include #include extern struct boot_params boot_params; -- cgit From c806e88734b9e9aea260bf2261c129aa23968fca Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:41 +0200 Subject: x86/pkeys: Provide *pkru() helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dave Hansen asked for __read_pkru() and __write_pkru() to be symmetrical. As part of the series __write_pkru() will read back the value and only write it if it is different. In order to make both functions symmetrical, move the function containing only the opcode asm into a function called like the instruction itself. __write_pkru() will just invoke wrpkru() but in a follow-up patch will also read back the value. [ bp: Convert asm opcode wrapper names to rd/wrpkru(). ] Suggested-by: Dave Hansen Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andi Kleen Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Jason A. Donenfeld" Cc: Joerg Roedel Cc: Juergen Gross Cc: "Kirill A. Shutemov" Cc: kvm ML Cc: Michal Hocko Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-13-bigeasy@linutronix.de --- arch/x86/include/asm/pgtable.h | 2 +- arch/x86/include/asm/special_insns.h | 12 +++++++++--- arch/x86/kvm/vmx/vmx.c | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2779ace16d23..e8875ca75623 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -127,7 +127,7 @@ static inline int pte_dirty(pte_t pte) static inline u32 read_pkru(void) { if (boot_cpu_has(X86_FEATURE_OSPKE)) - return __read_pkru(); + return rdpkru(); return 0; } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 43c029cdc3fe..34897e2b52c9 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -92,7 +92,7 @@ static inline void native_write_cr8(unsigned long val) #endif #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { u32 ecx = 0; u32 edx, pkru; @@ -107,7 +107,7 @@ static inline u32 __read_pkru(void) return pkru; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { u32 ecx = 0, edx = 0; @@ -118,8 +118,14 @@ static inline void __write_pkru(u32 pkru) asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (pkru), "c"(ecx), "d"(edx)); } + +static inline void __write_pkru(u32 pkru) +{ + wrpkru(pkru); +} + #else -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { return 0; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ab432a930ae8..dd1e1eea4f0b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6501,7 +6501,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = __read_pkru(); + vcpu->arch.pkru = rdpkru(); if (vcpu->arch.pkru != vmx->host_pkru) __write_pkru(vmx->host_pkru); } -- cgit From 577ff465f5a6a5a0866d75a033844810baca20a0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:42 +0200 Subject: x86/fpu: Only write PKRU if it is different from current MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Dave Hansen, WRPKRU is more expensive than RDPKRU. It has a higher cycle cost and it's also practically a (light) speculation barrier. As an optimisation read the current PKRU value and only write the new one if it is different. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Jason A. Donenfeld" Cc: Juergen Gross Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-14-bigeasy@linutronix.de --- arch/x86/include/asm/special_insns.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 34897e2b52c9..0a3c4cab39db 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -121,6 +121,13 @@ static inline void wrpkru(u32 pkru) static inline void __write_pkru(u32 pkru) { + /* + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. + */ + if (pkru == rdpkru()) + return; + wrpkru(pkru); } -- cgit From 0556cbdc2fbcb3068e5b924a8b3d5386ae0dd27d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:43 +0200 Subject: x86/pkeys: Don't check if PKRU is zero before writing it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit write_pkru() checks if the current value is the same as the expected value. So instead of just checking if the current and new value is zero (and skip the write in such a case) we can benefit from that. Remove the zero check of PKRU, __write_pkru() provides such a check now. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-15-bigeasy@linutronix.de --- arch/x86/mm/pkeys.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 05bb9a44eb1c..50f65fc1b9a3 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -142,13 +142,6 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | void copy_init_pkru_to_fpregs(void) { u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value); - /* - * Any write to PKRU takes it out of the XSAVE 'init - * state' which increases context switch cost. Avoid - * writing 0 when PKRU was already 0. - */ - if (!init_pkru_value_snapshot && !read_pkru()) - return; /* * Override the PKRU state that came from 'init_fpstate' * with the baseline from the process. -- cgit From 0cecca9d03c964abbd2b7927d0670eb70db4ebf2 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 3 Apr 2019 18:41:44 +0200 Subject: x86/fpu: Eager switch PKRU state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While most of a task's FPU state is only needed in user space, the protection keys need to be in place immediately after a context switch. The reason is that any access to userspace memory while running in kernel mode also needs to abide by the memory permissions specified in the protection keys. The "eager switch" is a preparation for loading the FPU state on return to userland. Instead of decoupling PKRU state from xstate, update PKRU within xstate on write operations by the kernel. For user tasks the PKRU should be always read from the xsave area and it should not change anything because the PKRU value was loaded as part of FPU restore. For kernel threads the default "init_pkru_value" will be written. Before this commit, the kernel thread would end up with a random value which it inherited from the previous user task. [ bigeasy: save pkru to xstate, no cache, don't use __raw_xsave_addr() ] [ bp: update commit message, sort headers properly in asm/fpu/xstate.h ] Signed-off-by: Rik van Riel Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andi Kleen Cc: Andy Lutomirski Cc: Aubrey Li Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: Joerg Roedel Cc: Juergen Gross Cc: "Kirill A. Shutemov" Cc: kvm ML Cc: Michal Hocko Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-16-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 24 ++++++++++++++++++++++-- arch/x86/include/asm/fpu/xstate.h | 4 +++- arch/x86/include/asm/pgtable.h | 6 ++++++ arch/x86/mm/pkeys.c | 1 - 4 files changed, 31 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3e0c2c496f2d..6eb4a0b1ad0e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -534,8 +535,27 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU)) - __fpregs_load_activate(new_fpu, cpu); + u32 pkru_val = init_pkru_value; + struct pkru_state *pk; + + if (!static_cpu_has(X86_FEATURE_FPU)) + return; + + __fpregs_load_activate(new_fpu, cpu); + + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + /* + * PKRU state is switched eagerly because it needs to be valid before we + * return to userland e.g. for a copy_to_user() operation. + */ + if (current->mm) { + pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); + if (pk) + pkru_val = pk->pkru; + } + __write_pkru(pkru_val); } /* diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index fbe41f808e5d..7e42b285c856 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -2,9 +2,11 @@ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H +#include #include + #include -#include +#include /* Bit 63 of XCR0 is reserved for future expansion */ #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e8875ca75623..9beb371b1adf 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1355,6 +1355,12 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #define PKRU_WD_BIT 0x2 #define PKRU_BITS_PER_PKEY 2 +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#else +#define init_pkru_value 0 +#endif + static inline bool __pkru_allows_read(u32 pkru, u16 pkey) { int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 50f65fc1b9a3..2ecbf4155f98 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -126,7 +126,6 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey * in the process's lifetime will not accidentally get access * to data which is pkey-protected later on. */ -static u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) | PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) | -- cgit From 383c252545edcc708128e2028a2318b05c45ede4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:45 +0200 Subject: x86/entry: Add TIF_NEED_FPU_LOAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TIF_NEED_FPU_LOAD. This flag is used for loading the FPU registers before returning to userland. It must not be set on systems without a FPU. If this flag is cleared, the CPU's FPU registers hold the latest, up-to-date content of the current task's (current()) FPU registers. The in-memory copy (union fpregs_state) is not valid. If this flag is set, then all of CPU's FPU registers may hold a random value (except for PKRU) and it is required to load the content of the FPU registers on return to userland. Introduce it now as a preparatory change before adding the main feature. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: Konrad Rzeszutek Wilk Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: Tim Chen Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-17-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 8 ++++++++ arch/x86/include/asm/thread_info.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 6eb4a0b1ad0e..da75d7b3e37d 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -508,6 +508,14 @@ static inline void __fpregs_load_activate(struct fpu *fpu, int cpu) * - switch_fpu_finish() restores the new state as * necessary. * + * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers + * are saved in the current thread's FPU register state. + * + * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not + * hold current()'s FPU registers. It is required to load the + * registers before returning to userland or using the content + * otherwise. + * * The FPU context is only stored/restored for a user task and * ->mm is used to distinguish between kernel and user threads. */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0eccbcb8447..f9453536f9bb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -88,6 +88,7 @@ struct thread_info { #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ +#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */ #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) +#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD) #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) -- cgit From 69277c98f5eef0d9839699b7825923c3985f665f Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 3 Apr 2019 18:41:46 +0200 Subject: x86/fpu: Always store the registers in copy_fpstate_to_sigframe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit copy_fpstate_to_sigframe() stores the registers directly to user space. This is okay because the FPU registers are valid and saving them directly avoids saving them into kernel memory and making a copy. However, this cannot be done anymore if the FPU registers are going to be restored on the return to userland. It is possible that the FPU registers will be invalidated in the middle of the save operation and this should be done with disabled preemption / BH. Save the FPU registers to the task's FPU struct and copy them to the user memory later on. Signed-off-by: Rik van Riel Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-18-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 155f4552413e..8f23f5237218 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -144,8 +144,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * - * Save the state directly to the user frame pointed by the aligned pointer - * 'buf_fx'. + * Save the state to task's fpu->state and then copy it to the user frame + * pointed to by the aligned pointer 'buf_fx'. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. @@ -155,6 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) */ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { + struct fpu *fpu = ¤t->thread.fpu; + struct xregs_state *xsave = &fpu->state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); @@ -169,9 +171,16 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - /* Save the live registers state to the user frame directly. */ - if (copy_fpregs_to_sigframe(buf_fx)) - return -1; + copy_fpregs_to_fpstate(fpu); + + if (using_compacted_format()) { + if (copy_xstate_to_user(buf_fx, xsave, 0, size)) + return -1; + } else { + fpstate_sanitize_xstate(fpu); + if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) + return -1; + } /* Save the fsave header for the 32-bit frames. */ if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) -- cgit From a352a3b7b7920212ee4c45a41500c66826318e92 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 3 Apr 2019 18:41:47 +0200 Subject: x86/fpu: Prepare copy_fpstate_to_sigframe() for TIF_NEED_FPU_LOAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FPU registers need only to be saved if TIF_NEED_FPU_LOAD is not set. Otherwise this has been already done and can be skipped. [ bp: Massage a bit. ] Signed-off-by: Rik van Riel Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-19-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 8f23f5237218..9b9dfdc96285 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -171,7 +171,17 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - copy_fpregs_to_fpstate(fpu); + /* + * If we do not need to load the FPU registers at return to userspace + * then the CPU has the current state and we need to save it. Otherwise, + * it has already been done and we can skip it. + */ + fpregs_lock(); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + copy_fpregs_to_fpstate(fpu); + set_thread_flag(TIF_NEED_FPU_LOAD); + } + fpregs_unlock(); if (using_compacted_format()) { if (copy_xstate_to_user(buf_fx, xsave, 0, size)) -- cgit From 0d714dba162620fd8b9f5b3104a487e041353c4d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:48 +0200 Subject: x86/fpu: Update xstate's PKRU value on write_pkru() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the context switch the xstate is loaded which also includes the PKRU value. If xstate is restored on return to userland it is required that the PKRU value in xstate is the same as the one in the CPU. Save the PKRU in xstate during modification. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andi Kleen Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Jason A. Donenfeld" Cc: Joerg Roedel Cc: Juergen Gross Cc: "Kirill A. Shutemov" Cc: kvm ML Cc: Michal Hocko Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-20-bigeasy@linutronix.de --- arch/x86/include/asm/pgtable.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9beb371b1adf..5cfbbb6d458d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -23,6 +23,8 @@ #ifndef __ASSEMBLY__ #include +#include +#include extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -133,8 +135,23 @@ static inline u32 read_pkru(void) static inline void write_pkru(u32 pkru) { - if (boot_cpu_has(X86_FEATURE_OSPKE)) - __write_pkru(pkru); + struct pkru_state *pk; + + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return; + + pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); + + /* + * The PKRU value in xstate needs to be in sync with the value that is + * written to the CPU. The FPU restore on return to userland would + * otherwise load the previous value again. + */ + fpregs_lock(); + if (pk) + pk->pkru = pkru; + __write_pkru(pkru); + fpregs_unlock(); } static inline int pte_young(pte_t pte) -- cgit From e0d3602f933367881bddfff310a744e6e61c284c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:49 +0200 Subject: x86/fpu: Inline copy_user_to_fpregs_zeroing() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start refactoring __fpu__restore_sig() by inlining copy_user_to_fpregs_zeroing(). The original function remains and will be used to restore from userland memory if possible. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-21-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 9b9dfdc96285..c2ff43fbbd07 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -337,11 +337,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) kfree(tmp); return err; } else { + int ret; + /* * For 64-bit frames and 32-bit fsave frames, restore the user * state to the registers directly (with exceptions handled). */ - if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { + if (use_xsave()) { + if ((unsigned long)buf_fx % 64 || fx_only) { + u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + ret = copy_user_to_fxregs(buf_fx); + } else { + u64 init_bv = xfeatures_mask & ~xfeatures; + if (unlikely(init_bv)) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + ret = copy_user_to_xregs(buf_fx, xfeatures); + } + } else if (use_fxsr()) { + ret = copy_user_to_fxregs(buf_fx); + } else + ret = copy_user_to_fregs(buf_fx); + + if (ret) { fpu__clear(fpu); return -1; } -- cgit From 926b21f37b072ae4c117052de45a975c6d468fec Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:50 +0200 Subject: x86/fpu: Restore from kernel memory on the 64-bit path too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 64-bit case (both 64-bit and 32-bit frames) loads the new state from user memory. However, doing this is not desired if the FPU state is going to be restored on return to userland: it would be required to disable preemption in order to avoid a context switch which would set TIF_NEED_FPU_LOAD. If this happens before the restore operation then the loaded registers would become volatile. Furthermore, disabling preemption while accessing user memory requires to disable the pagefault handler. An error during FXRSTOR would then mean that either a page fault occurred (and it would have to be retried with enabled page fault handler) or a #GP occurred because the xstate is bogus (after all, the signal handler can modify it). In order to avoid that mess, copy the FPU state from userland, validate it and then load it. The copy_kernel_…() helpers are basically just like the old helpers except that they operate on kernel memory and the fault handler just sets the error value and the caller handles it. copy_user_to_fpregs_zeroing() and its helpers remain and will be used later for a fastpath optimisation. [ bp: Clarify commit message. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-22-bigeasy@linutronix.de --- arch/x86/include/asm/fpu/internal.h | 43 +++++++++++++++++++++++++ arch/x86/kernel/fpu/signal.c | 62 +++++++++++++++++++++++++++++-------- 2 files changed, 92 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index da75d7b3e37d..2cf04fbcba5d 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -121,6 +121,21 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) +#define kernel_insn_err(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + #define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ @@ -149,6 +164,14 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } +static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) @@ -162,6 +185,11 @@ static inline void copy_kernel_to_fregs(struct fregs_state *fx) kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } +static inline int copy_kernel_to_fregs_err(struct fregs_state *fx) +{ + return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + static inline int copy_user_to_fregs(struct fregs_state __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -361,6 +389,21 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) return err; } +/* + * Restore xstate from kernel space xsave area, return an error code instead of + * an exception. + */ +static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + + return err; +} + /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact and we can diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index c2ff43fbbd07..9ea1eaa4c9b1 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -234,7 +234,8 @@ sanitize_restored_xstate(union fpregs_state *state, */ xsave->i387.mxcsr &= mxcsr_feature_mask; - convert_to_fxsr(&state->fxsave, ia32_env); + if (ia32_env) + convert_to_fxsr(&state->fxsave, ia32_env); } } @@ -337,28 +338,63 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) kfree(tmp); return err; } else { + union fpregs_state *state; + void *tmp; int ret; + tmp = kzalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + state = PTR_ALIGN(tmp, 64); + /* * For 64-bit frames and 32-bit fsave frames, restore the user * state to the registers directly (with exceptions handled). */ - if (use_xsave()) { - if ((unsigned long)buf_fx % 64 || fx_only) { - u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - ret = copy_user_to_fxregs(buf_fx); + if ((unsigned long)buf_fx % 64) + fx_only = 1; + + if (use_xsave() && !fx_only) { + u64 init_bv = xfeatures_mask & ~xfeatures; + + if (using_compacted_format()) { + ret = copy_user_to_xstate(&state->xsave, buf_fx); } else { - u64 init_bv = xfeatures_mask & ~xfeatures; - if (unlikely(init_bv)) - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - ret = copy_user_to_xregs(buf_fx, xfeatures); + ret = __copy_from_user(&state->xsave, buf_fx, state_size); + + if (!ret && state_size > offsetof(struct xregs_state, header)) + ret = validate_xstate_header(&state->xsave.header); } + if (ret) + goto err_out; + + sanitize_restored_xstate(state, NULL, xfeatures, fx_only); + + if (unlikely(init_bv)) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + ret = copy_kernel_to_xregs_err(&state->xsave, xfeatures); + } else if (use_fxsr()) { - ret = copy_user_to_fxregs(buf_fx); - } else - ret = copy_user_to_fregs(buf_fx); + ret = __copy_from_user(&state->fxsave, buf_fx, state_size); + if (ret) + goto err_out; + if (use_xsave()) { + u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + } + state->fxsave.mxcsr &= mxcsr_feature_mask; + + ret = copy_kernel_to_fxregs_err(&state->fxsave); + } else { + ret = __copy_from_user(&state->fsave, buf_fx, state_size); + if (ret) + goto err_out; + ret = copy_kernel_to_fregs_err(&state->fsave); + } + +err_out: + kfree(tmp); if (ret) { fpu__clear(fpu); return -1; -- cgit From c2ff9e9a3d9d6c019394a22989a228d02970a8b1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:51 +0200 Subject: x86/fpu: Merge the two code paths in __fpu__restore_sig() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ia32_fxstate case (32bit with fxsr) and the other (64bit frames or 32bit frames without fxsr) restore both from kernel memory and sanitize the content. The !ia32_fxstate version restores missing xstates from "init state" while the ia32_fxstate doesn't and skips it. Merge the two code paths and keep the !ia32_fxstate one. Copy only the user_i387_ia32_struct data structure in the ia32_fxstate. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-23-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 139 +++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 85 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 9ea1eaa4c9b1..b13e86b29426 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -263,12 +263,17 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) { + struct user_i387_ia32_struct *envp = NULL; + int state_size = fpu_kernel_xstate_size; int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - int state_size = fpu_kernel_xstate_size; + struct user_i387_ia32_struct env; + union fpregs_state *state; u64 xfeatures = 0; int fx_only = 0; + int ret = 0; + void *tmp; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -303,105 +308,69 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } } + tmp = kzalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + state = PTR_ALIGN(tmp, 64); + + if ((unsigned long)buf_fx % 64) + fx_only = 1; + + /* + * For 32-bit frames with fxstate, copy the fxstate so it can be + * reconstructed later. + */ if (ia32_fxstate) { - /* - * For 32-bit frames with fxstate, copy the user state to the - * thread's fpu state, reconstruct fxstate from the fsave - * header. Validate and sanitize the copied state. - */ - struct user_i387_ia32_struct env; - union fpregs_state *state; - int err = 0; - void *tmp; + ret = __copy_from_user(&env, buf, sizeof(env)); + if (ret) + goto err_out; + envp = &env; + } - tmp = kzalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL); - if (!tmp) - return -ENOMEM; - state = PTR_ALIGN(tmp, 64); + if (use_xsave() && !fx_only) { + u64 init_bv = xfeatures_mask & ~xfeatures; if (using_compacted_format()) { - err = copy_user_to_xstate(&state->xsave, buf_fx); + ret = copy_user_to_xstate(&state->xsave, buf_fx); } else { - err = __copy_from_user(&state->xsave, buf_fx, state_size); + ret = __copy_from_user(&state->xsave, buf_fx, state_size); - if (!err && state_size > offsetof(struct xregs_state, header)) - err = validate_xstate_header(&state->xsave.header); + if (!ret && state_size > offsetof(struct xregs_state, header)) + ret = validate_xstate_header(&state->xsave.header); } + if (ret) + goto err_out; - if (err || __copy_from_user(&env, buf, sizeof(env))) { - err = -1; - } else { - sanitize_restored_xstate(state, &env, xfeatures, fx_only); - copy_kernel_to_fpregs(state); - } - - kfree(tmp); - return err; - } else { - union fpregs_state *state; - void *tmp; - int ret; - - tmp = kzalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL); - if (!tmp) - return -ENOMEM; - state = PTR_ALIGN(tmp, 64); - - /* - * For 64-bit frames and 32-bit fsave frames, restore the user - * state to the registers directly (with exceptions handled). - */ - if ((unsigned long)buf_fx % 64) - fx_only = 1; - - if (use_xsave() && !fx_only) { - u64 init_bv = xfeatures_mask & ~xfeatures; - - if (using_compacted_format()) { - ret = copy_user_to_xstate(&state->xsave, buf_fx); - } else { - ret = __copy_from_user(&state->xsave, buf_fx, state_size); - - if (!ret && state_size > offsetof(struct xregs_state, header)) - ret = validate_xstate_header(&state->xsave.header); - } - if (ret) - goto err_out; - - sanitize_restored_xstate(state, NULL, xfeatures, fx_only); - - if (unlikely(init_bv)) - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - ret = copy_kernel_to_xregs_err(&state->xsave, xfeatures); + sanitize_restored_xstate(state, envp, xfeatures, fx_only); - } else if (use_fxsr()) { - ret = __copy_from_user(&state->fxsave, buf_fx, state_size); - if (ret) - goto err_out; + if (unlikely(init_bv)) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + ret = copy_kernel_to_xregs_err(&state->xsave, xfeatures); - if (use_xsave()) { - u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - } - state->fxsave.mxcsr &= mxcsr_feature_mask; + } else if (use_fxsr()) { + ret = __copy_from_user(&state->fxsave, buf_fx, state_size); + if (ret) + goto err_out; - ret = copy_kernel_to_fxregs_err(&state->fxsave); - } else { - ret = __copy_from_user(&state->fsave, buf_fx, state_size); - if (ret) - goto err_out; - ret = copy_kernel_to_fregs_err(&state->fsave); + sanitize_restored_xstate(state, envp, xfeatures, fx_only); + if (use_xsave()) { + u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); } -err_out: - kfree(tmp); - if (ret) { - fpu__clear(fpu); - return -1; - } + ret = copy_kernel_to_fxregs_err(&state->fxsave); + } else { + ret = __copy_from_user(&state->fsave, buf_fx, state_size); + if (ret) + goto err_out; + ret = copy_kernel_to_fregs_err(&state->fsave); } - return 0; +err_out: + kfree(tmp); + if (ret) + fpu__clear(fpu); + return ret; } static inline int xstate_sigframe_size(void) -- cgit From 5f409e20b794565e2d60ad333e79334630a6c798 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 3 Apr 2019 18:41:52 +0200 Subject: x86/fpu: Defer FPU state load until return to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defer loading of FPU state until return to userspace. This gives the kernel the potential to skip loading FPU state for tasks that stay in kernel mode, or for tasks that end up with repeated invocations of kernel_fpu_begin() & kernel_fpu_end(). The fpregs_lock/unlock() section ensures that the registers remain unchanged. Otherwise a context switch or a bottom half could save the registers to its FPU context and the processor's FPU registers would became random if modified at the same time. KVM swaps the host/guest registers on entry/exit path. This flow has been kept as is. First it ensures that the registers are loaded and then saves the current (host) state before it loads the guest's registers. The swap is done at the very end with disabled interrupts so it should not change anymore before theg guest is entered. The read/save version seems to be cheaper compared to memcpy() in a micro benchmark. Each thread gets TIF_NEED_FPU_LOAD set as part of fork() / fpu__copy(). For kernel threads, this flag gets never cleared which avoids saving / restoring the FPU state for kernel threads and during in-kernel usage of the FPU registers. [ bp: Correct and update commit message and fix checkpatch warnings. s/register/registers/ where it is used in plural. minor comment corrections. remove unused trace_x86_fpu_activate_state() TP. ] Signed-off-by: Rik van Riel Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Aubrey Li Cc: Babu Moger Cc: "Chang S. Bae" Cc: Dmitry Safonov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: kvm ML Cc: Nicolai Stange Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Tim Chen Cc: Waiman Long Cc: x86-ml Cc: Yi Wang Link: https://lkml.kernel.org/r/20190403164156.19645-24-bigeasy@linutronix.de --- arch/x86/entry/common.c | 10 +++- arch/x86/include/asm/fpu/api.h | 22 +++++++- arch/x86/include/asm/fpu/internal.h | 27 +++++---- arch/x86/include/asm/trace/fpu.h | 10 ++-- arch/x86/kernel/fpu/core.c | 106 +++++++++++++++++++++++++++--------- arch/x86/kernel/fpu/signal.c | 49 ++++++++++------- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/process_32.c | 5 +- arch/x86/kernel/process_64.c | 5 +- arch/x86/kvm/x86.c | 20 +++++-- 10 files changed, 184 insertions(+), 72 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7bc105f47d21..51beb8d29123 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -25,12 +25,13 @@ #include #include #include +#include #include #include #include -#include #include +#include #define CREATE_TRACE_POINTS #include @@ -196,6 +197,13 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) exit_to_usermode_loop(regs, cached_flags); + /* Reload ti->flags; we may have rescheduled above. */ + cached_flags = READ_ONCE(ti->flags); + + fpregs_assert_state_consistent(); + if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + #ifdef CONFIG_COMPAT /* * Compat syscalls set TS_COMPAT. Make sure we clear it before diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 73e684160f35..b774c52e5411 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,7 +10,7 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H -#include +#include /* * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It @@ -22,17 +22,37 @@ extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); +extern void fpregs_mark_activate(void); +/* + * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. + * A context switch will (and softirq might) save CPU's FPU registers to + * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in + * a random state. + */ static inline void fpregs_lock(void) { preempt_disable(); + local_bh_disable(); } static inline void fpregs_unlock(void) { + local_bh_enable(); preempt_enable(); } +#ifdef CONFIG_X86_DEBUG_FPU +extern void fpregs_assert_state_consistent(void); +#else +static inline void fpregs_assert_state_consistent(void) { } +#endif + +/* + * Load the task FPU state before returning to userspace. + */ +extern void switch_fpu_return(void); + /* * Query the presence of one or more xfeatures. Works on any legacy CPU as well. * diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 2cf04fbcba5d..0c8a5093647a 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -30,7 +30,7 @@ extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); -extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); +extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern void fpu__clear(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); @@ -531,13 +531,20 @@ static inline void fpregs_activate(struct fpu *fpu) /* * Internal helper, do not use directly. Use switch_fpu_return() instead. */ -static inline void __fpregs_load_activate(struct fpu *fpu, int cpu) +static inline void __fpregs_load_activate(void) { + struct fpu *fpu = ¤t->thread.fpu; + int cpu = smp_processor_id(); + + if (WARN_ON_ONCE(current->mm == NULL)) + return; + if (!fpregs_state_valid(fpu, cpu)) { - if (current->mm) - copy_kernel_to_fpregs(&fpu->state); + copy_kernel_to_fpregs(&fpu->state); fpregs_activate(fpu); + fpu->last_cpu = cpu; } + clear_thread_flag(TIF_NEED_FPU_LOAD); } /* @@ -548,8 +555,8 @@ static inline void __fpregs_load_activate(struct fpu *fpu, int cpu) * - switch_fpu_prepare() saves the old state. * This is done within the context of the old process. * - * - switch_fpu_finish() restores the new state as - * necessary. + * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state + * will get loaded on return to userspace, or when the kernel needs it. * * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers * are saved in the current thread's FPU register state. @@ -581,10 +588,10 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ /* - * Set up the userspace FPU context for the new task, if the task - * has used the FPU. + * Load PKRU from the FPU context if available. Delay loading of the + * complete FPU state until the return to userland. */ -static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) +static inline void switch_fpu_finish(struct fpu *new_fpu) { u32 pkru_val = init_pkru_value; struct pkru_state *pk; @@ -592,7 +599,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) if (!static_cpu_has(X86_FEATURE_FPU)) return; - __fpregs_load_activate(new_fpu, cpu); + set_thread_flag(TIF_NEED_FPU_LOAD); if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index bd65f6ba950f..879b77792f94 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -13,19 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) + __field(bool, load_fpu) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; + __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, + __entry->load_fpu, __entry->xfeatures, __entry->xcomp_bv ) @@ -61,11 +64,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_init_state, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 739ca3ae2bdc..ce243f76bdb7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -102,23 +102,20 @@ static void __kernel_fpu_begin(void) kernel_fpu_disable(); if (current->mm) { - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ - copy_fpregs_to_fpstate(fpu); - } else { - __cpu_invalidate_fpregs_state(); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + set_thread_flag(TIF_NEED_FPU_LOAD); + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ + copy_fpregs_to_fpstate(fpu); + } } + __cpu_invalidate_fpregs_state(); } static void __kernel_fpu_end(void) { - struct fpu *fpu = ¤t->thread.fpu; - - if (current->mm) - copy_kernel_to_fpregs(&fpu->state); - kernel_fpu_enable(); } @@ -145,14 +142,17 @@ void fpu__save(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - preempt_disable(); + fpregs_lock(); trace_x86_fpu_before_save(fpu); - if (!copy_fpregs_to_fpstate(fpu)) - copy_kernel_to_fpregs(&fpu->state); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + if (!copy_fpregs_to_fpstate(fpu)) { + copy_kernel_to_fpregs(&fpu->state); + } + } trace_x86_fpu_after_save(fpu); - preempt_enable(); + fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu__save); @@ -185,8 +185,11 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct task_struct *dst, struct task_struct *src) { + struct fpu *dst_fpu = &dst->thread.fpu; + struct fpu *src_fpu = &src->thread.fpu; + dst_fpu->last_cpu = -1; if (!static_cpu_has(X86_FEATURE_FPU)) @@ -201,16 +204,23 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* - * Save current FPU registers directly into the child - * FPU context, without any memory-to-memory copying. + * If the FPU registers are not current just memcpy() the state. + * Otherwise save current FPU registers directly into the child's FPU + * context, without any memory-to-memory copying. * * ( The function 'fails' in the FNSAVE case, which destroys - * register contents so we have to copy them back. ) + * register contents so we have to load them back. ) */ - if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); - copy_kernel_to_fpregs(&src_fpu->state); - } + fpregs_lock(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); + + else if (!copy_fpregs_to_fpstate(dst_fpu)) + copy_kernel_to_fpregs(&dst_fpu->state); + + fpregs_unlock(); + + set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); @@ -226,10 +236,9 @@ static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); + set_thread_flag(TIF_NEED_FPU_LOAD); fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); } /* @@ -308,6 +317,8 @@ void fpu__drop(struct fpu *fpu) */ static inline void copy_init_fpstate_to_fpregs(void) { + fpregs_lock(); + if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); else if (static_cpu_has(X86_FEATURE_FXSR)) @@ -317,6 +328,9 @@ static inline void copy_init_fpstate_to_fpregs(void) if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); + + fpregs_mark_activate(); + fpregs_unlock(); } /* @@ -339,6 +353,46 @@ void fpu__clear(struct fpu *fpu) copy_init_fpstate_to_fpregs(); } +/* + * Load FPU context before returning to userspace. + */ +void switch_fpu_return(void) +{ + if (!static_cpu_has(X86_FEATURE_FPU)) + return; + + __fpregs_load_activate(); +} +EXPORT_SYMBOL_GPL(switch_fpu_return); + +#ifdef CONFIG_X86_DEBUG_FPU +/* + * If current FPU state according to its tracking (loaded FPU context on this + * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is + * loaded on return to userland. + */ +void fpregs_assert_state_consistent(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + return; + + WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); +} +EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); +#endif + +void fpregs_mark_activate(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + fpregs_activate(fpu); + fpu->last_cpu = smp_processor_id(); + clear_thread_flag(TIF_NEED_FPU_LOAD); +} +EXPORT_SYMBOL_GPL(fpregs_mark_activate); + /* * x87 math exception handling: */ diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index b13e86b29426..6df1f15e0cd5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -269,11 +269,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; - union fpregs_state *state; u64 xfeatures = 0; int fx_only = 0; int ret = 0; - void *tmp; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -308,14 +306,18 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } } - tmp = kzalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL); - if (!tmp) - return -ENOMEM; - state = PTR_ALIGN(tmp, 64); + /* + * The current state of the FPU registers does not matter. By setting + * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate + * is not modified on context switch and that the xstate is considered + * to be loaded again on return to userland (overriding last_cpu avoids + * the optimisation). + */ + set_thread_flag(TIF_NEED_FPU_LOAD); + __fpu_invalidate_fpregs_state(fpu); if ((unsigned long)buf_fx % 64) fx_only = 1; - /* * For 32-bit frames with fxstate, copy the fxstate so it can be * reconstructed later. @@ -331,43 +333,52 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) u64 init_bv = xfeatures_mask & ~xfeatures; if (using_compacted_format()) { - ret = copy_user_to_xstate(&state->xsave, buf_fx); + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); } else { - ret = __copy_from_user(&state->xsave, buf_fx, state_size); + ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); if (!ret && state_size > offsetof(struct xregs_state, header)) - ret = validate_xstate_header(&state->xsave.header); + ret = validate_xstate_header(&fpu->state.xsave.header); } if (ret) goto err_out; - sanitize_restored_xstate(state, envp, xfeatures, fx_only); + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); + fpregs_lock(); if (unlikely(init_bv)) copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - ret = copy_kernel_to_xregs_err(&state->xsave, xfeatures); + ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures); } else if (use_fxsr()) { - ret = __copy_from_user(&state->fxsave, buf_fx, state_size); - if (ret) + ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); + if (ret) { + ret = -EFAULT; goto err_out; + } + + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); - sanitize_restored_xstate(state, envp, xfeatures, fx_only); + fpregs_lock(); if (use_xsave()) { u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); } - ret = copy_kernel_to_fxregs_err(&state->fxsave); + ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave); } else { - ret = __copy_from_user(&state->fsave, buf_fx, state_size); + ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) goto err_out; - ret = copy_kernel_to_fregs_err(&state->fsave); + + fpregs_lock(); + ret = copy_kernel_to_fregs_err(&fpu->state.fsave); } + if (!ret) + fpregs_mark_activate(); + fpregs_unlock(); err_out: - kfree(tmp); if (ret) fpu__clear(fpu); return ret; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 58ac7be52c7a..b9b8e6eb74f2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -101,7 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.vm86 = NULL; #endif - return fpu__copy(&dst->thread.fpu, &src->thread.fpu); + return fpu__copy(dst, src); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 77d9eb43ccac..1bc47f3a4885 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -234,7 +234,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - switch_fpu_prepare(prev_fpu, cpu); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -290,7 +291,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); - switch_fpu_finish(next_fpu, cpu); + switch_fpu_finish(next_fpu); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ffea7c557963..37b2ecef041e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -520,7 +520,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); - switch_fpu_prepare(prev_fpu, cpu); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -572,7 +573,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(next_fpu, cpu); + switch_fpu_finish(next_fpu); /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8022e7769b3a..e340c3c0cba3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7877,6 +7877,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) wait_lapic_expire(vcpu); guest_enter_irqoff(); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); set_debugreg(vcpu->arch.eff_db[0], 0); @@ -8137,22 +8141,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - preempt_disable(); + fpregs_lock(); + copy_fpregs_to_fpstate(¤t->thread.fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); - preempt_enable(); + + fpregs_mark_activate(); + fpregs_unlock(); + trace_kvm_fpu(1); } /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - preempt_disable(); + fpregs_lock(); + copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); copy_kernel_to_fpregs(¤t->thread.fpu.state); - preempt_enable(); + + fpregs_mark_activate(); + fpregs_unlock(); + ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } -- cgit From 1d731e731c4cd7cbd3b1aa295f0932e7610da82f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:53 +0200 Subject: x86/fpu: Add a fastpath to __fpu__restore_sig() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commits refactor the restoration of the FPU registers so that they can be loaded from in-kernel memory. This overhead can be avoided if the load can be performed without a pagefault. Attempt to restore FPU registers by invoking copy_user_to_fpregs_zeroing(). If it fails try the slowpath which can handle pagefaults. [ bp: Add a comment over the fastpath to be able to find one's way around the function. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-25-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 6df1f15e0cd5..a1bd7be70206 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -242,10 +242,10 @@ sanitize_restored_xstate(union fpregs_state *state, /* * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) +static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) { if (use_xsave()) { - if ((unsigned long)buf % 64 || fx_only) { + if (fx_only) { u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); return copy_user_to_fxregs(buf); @@ -327,8 +327,27 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (ret) goto err_out; envp = &env; + } else { + /* + * Attempt to restore the FPU registers directly from user + * memory. For that to succeed, the user access cannot cause + * page faults. If it does, fall back to the slow path below, + * going through the kernel buffer with the enabled pagefault + * handler. + */ + fpregs_lock(); + pagefault_disable(); + ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only); + pagefault_enable(); + if (!ret) { + fpregs_mark_activate(); + fpregs_unlock(); + return 0; + } + fpregs_unlock(); } + if (use_xsave() && !fx_only) { u64 init_bv = xfeatures_mask & ~xfeatures; -- cgit From da2f32fb8dc7cbd9433cb2e990693734b30a2465 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:54 +0200 Subject: x86/fpu: Add a fastpath to copy_fpstate_to_sigframe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to save the FPU registers directly to the userland stack frame if the CPU holds the FPU registers for the current task. This has to be done with the pagefault disabled because we can't fault (while the FPU registers are locked) and therefore the operation might fail. If it fails try the slowpath which can handle faults. [ bp: Massage a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-26-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a1bd7be70206..3c3167576216 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -144,8 +144,10 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * - * Save the state to task's fpu->state and then copy it to the user frame - * pointed to by the aligned pointer 'buf_fx'. + * Try to save it directly to the user frame with disabled page fault handler. + * If this fails then do the slow path where the FPU state is first saved to + * task's fpu->state and then copy it to the user frame pointed to by the + * aligned pointer 'buf_fx'. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. @@ -159,6 +161,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) struct xregs_state *xsave = &fpu->state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); + int ret = -EFAULT; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -173,23 +176,30 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) /* * If we do not need to load the FPU registers at return to userspace - * then the CPU has the current state and we need to save it. Otherwise, - * it has already been done and we can skip it. + * then the CPU has the current state. Try to save it directly to + * userland's stack frame if it does not cause a pagefault. If it does, + * try the slowpath. */ fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - copy_fpregs_to_fpstate(fpu); + pagefault_disable(); + ret = copy_fpregs_to_sigframe(buf_fx); + pagefault_enable(); + if (ret) + copy_fpregs_to_fpstate(fpu); set_thread_flag(TIF_NEED_FPU_LOAD); } fpregs_unlock(); - if (using_compacted_format()) { - if (copy_xstate_to_user(buf_fx, xsave, 0, size)) - return -1; - } else { - fpstate_sanitize_xstate(fpu); - if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) - return -1; + if (ret) { + if (using_compacted_format()) { + if (copy_xstate_to_user(buf_fx, xsave, 0, size)) + return -1; + } else { + fpstate_sanitize_xstate(fpu); + if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) + return -1; + } } /* Save the fsave header for the 32-bit frames. */ -- cgit From 06b251dff78704c7d122bd109384d970a7dbe94d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 12 Apr 2019 20:16:15 +0200 Subject: x86/fpu: Restore regs in copy_fpstate_to_sigframe() in order to use the fastpath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a task is scheduled out and receives a signal then it won't be able to take the fastpath because the registers aren't available. The slowpath is more expensive compared to XRSTOR + XSAVE which usually succeeds. Here are some clock_gettime() numbers from a bigger box with AVX512 during bootup: - __fpregs_load_activate() takes 140ns - 350ns. If it was the most recent FPU context on the CPU then the optimisation in __fpregs_load_activate() will skip the load (which was disabled during the test). - copy_fpregs_to_sigframe() takes 200ns - 450ns if it succeeds. On a pagefault it is 1.8us - 3us usually in the 2.6us area. - The slowpath takes 1.5us - 6us. Usually in the 2.6us area. My testcases (including lat_sig) take the fastpath without __fpregs_load_activate(). I expect this to be the majority. Since the slowpath is in the >1us area it makes sense to load the registers and attempt to save them directly. The direct save may fail but should only happen on the first invocation or after fork() while the page is read-only. [ bp: Massage a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-27-bigeasy@linutronix.de --- arch/x86/kernel/fpu/signal.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 3c3167576216..7026f1c4e5e3 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -175,20 +175,21 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) (struct _fpstate_32 __user *) buf) ? -1 : 1; /* - * If we do not need to load the FPU registers at return to userspace - * then the CPU has the current state. Try to save it directly to - * userland's stack frame if it does not cause a pagefault. If it does, - * try the slowpath. + * Load the FPU registers if they are not valid for the current task. + * With a valid FPU state we can attempt to save the state directly to + * userland's stack frame which will likely succeed. If it does not, do + * the slowpath. */ fpregs_lock(); - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - pagefault_disable(); - ret = copy_fpregs_to_sigframe(buf_fx); - pagefault_enable(); - if (ret) - copy_fpregs_to_fpstate(fpu); - set_thread_flag(TIF_NEED_FPU_LOAD); - } + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + __fpregs_load_activate(); + + pagefault_disable(); + ret = copy_fpregs_to_sigframe(buf_fx); + pagefault_enable(); + if (ret && !test_thread_flag(TIF_NEED_FPU_LOAD)) + copy_fpregs_to_fpstate(fpu); + set_thread_flag(TIF_NEED_FPU_LOAD); fpregs_unlock(); if (ret) { -- cgit From a5eff7259790d5314eff10563d6e59d358cce482 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 3 Apr 2019 18:41:56 +0200 Subject: x86/pkeys: Add PKRU value to init_fpstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The task's initial PKRU value is set partly for fpu__clear()/ copy_init_pkru_to_fpregs(). It is not part of init_fpstate.xsave and instead it is set explicitly. If the user removes the PKRU state from XSAVE in the signal handler then __fpu__restore_sig() will restore the missing bits from `init_fpstate' and initialize the PKRU value to 0. Add the `init_pkru_value' to `init_fpstate' so it is set to the init value in such a case. In theory copy_init_pkru_to_fpregs() could be removed because restoring the PKRU at return-to-userland should be enough. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andi Kleen Cc: Andy Lutomirski Cc: "Chang S. Bae" Cc: Dominik Brodowski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "Jason A. Donenfeld" Cc: Konrad Rzeszutek Wilk Cc: kvm ML Cc: Paolo Bonzini Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: x86-ml Link: https://lkml.kernel.org/r/20190403164156.19645-28-bigeasy@linutronix.de --- arch/x86/kernel/cpu/common.c | 5 +++++ arch/x86/mm/pkeys.c | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..352fa19e6311 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -372,6 +372,8 @@ static bool pku_disabled; static __always_inline void setup_pku(struct cpuinfo_x86 *c) { + struct pkru_state *pk; + /* check the boot processor, plus compile options for PKU: */ if (!cpu_feature_enabled(X86_FEATURE_PKU)) return; @@ -382,6 +384,9 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) return; cr4_set_bits(X86_CR4_PKE); + pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); + if (pk) + pk->pkru = init_pkru_value; /* * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE * cpuid bit to be set. We need to ensure that we diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 2ecbf4155f98..1dcfc91c8f0c 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -18,6 +18,7 @@ #include /* boot_cpu_has, ... */ #include /* vma_pkey() */ +#include /* init_fpstate */ int __execute_only_pkey(struct mm_struct *mm) { @@ -161,6 +162,7 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf, static ssize_t init_pkru_write_file(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { + struct pkru_state *pk; char buf[32]; ssize_t len; u32 new_init_pkru; @@ -183,6 +185,10 @@ static ssize_t init_pkru_write_file(struct file *file, return -EINVAL; WRITE_ONCE(init_pkru_value, new_init_pkru); + pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); + if (!pk) + return -EINVAL; + pk->pkru = new_init_pkru; return count; } -- cgit From cae5ec342645746d617dd420d206e1588d47768a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 12 Apr 2019 17:50:57 -0400 Subject: x86/speculation/mds: Fix comment s/L1TF/MDS/ Signed-off-by: Boris Ostrovsky Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Josh Poimboeuf --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 22a14d4b68a2..0642505dda69 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -221,7 +221,7 @@ static void x86_amd_ssb_disable(void) #undef pr_fmt #define pr_fmt(fmt) "MDS: " fmt -/* Default mitigation for L1TF-affected CPUs */ +/* Default mitigation for MDS-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; static bool mds_nosmt __ro_after_init = false; -- cgit From e2c3c94788b08891dcf3dbe608f9880523ecd71b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 12 Apr 2019 17:50:58 -0400 Subject: x86/speculation/mds: Print SMT vulnerable on MSBDS with mitigations off This code is only for CPUs which are affected by MSBDS, but are *not* affected by the other two MDS issues. For such CPUs, enabling the mds_idle_clear mitigation is enough to mitigate SMT. However if user boots with 'mds=off' and still has SMT enabled, we should not report that SMT is mitigated: $cat /sys//devices/system/cpu/vulnerabilities/mds Vulnerable; SMT mitigated But rather: Vulnerable; SMT vulnerable Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20190412215118.294906495@localhost.localdomain --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0642505dda69..6b8a55c7cebc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1204,7 +1204,8 @@ static ssize_t mds_show_state(char *buf) if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - sched_smt_active() ? "mitigated" : "disabled"); + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); } return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], -- cgit From ea094d53580f40c2124cef3d072b73b2425e7bfd Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 17 Apr 2019 09:18:50 -0500 Subject: x86/PCI: Fix PCI IRQ routing table memory leak In pcibios_irq_init(), the PCI IRQ routing table 'pirq_table' is first found through pirq_find_routing_table(). If the table is not found and CONFIG_PCI_BIOS is defined, the table is then allocated in pcibios_get_irq_routing_table() using kmalloc(). Later, if the I/O APIC is used, this table is actually not used. In that case, the allocated table is not freed, which is a memory leak. Free the allocated table if it is not used. Signed-off-by: Wenwen Wang [bhelgaas: added Ingo's reviewed-by, since the only change since v1 was to use the irq_routing_table local variable name he suggested] Signed-off-by: Bjorn Helgaas Reviewed-by: Ingo Molnar Acked-by: Thomas Gleixner --- arch/x86/pci/irq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 52e55108404e..d3a73f9335e1 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1119,6 +1119,8 @@ static const struct dmi_system_id pciirq_dmi_table[] __initconst = { void __init pcibios_irq_init(void) { + struct irq_routing_table *rtable = NULL; + DBG(KERN_DEBUG "PCI: IRQ init\n"); if (raw_pci_ops == NULL) @@ -1129,8 +1131,10 @@ void __init pcibios_irq_init(void) pirq_table = pirq_find_routing_table(); #ifdef CONFIG_PCI_BIOS - if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) { pirq_table = pcibios_get_irq_routing_table(); + rtable = pirq_table; + } #endif if (pirq_table) { pirq_peer_trick(); @@ -1145,8 +1149,10 @@ void __init pcibios_irq_init(void) * If we're using the I/O APIC, avoid using the PCI IRQ * routing table */ - if (io_apic_assign_pci_irqs) + if (io_apic_assign_pci_irqs) { + kfree(rtable); pirq_table = NULL; + } } x86_init.pci.fixup_irqs(); -- cgit From 5c14068f87d04adc73ba3f41c2a303d3c3d1fa12 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Apr 2019 16:39:02 -0500 Subject: x86/speculation/mds: Add 'mitigations=' support for MDS Add MDS to the new 'mitigations=' cmdline option. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3c5c3c3ba734..667c273a66d7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -233,7 +233,7 @@ static const char * const mds_strings[] = { static void __init mds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MDS)) { + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { mds_mitigation = MDS_MITIGATION_OFF; return; } @@ -244,7 +244,8 @@ static void __init mds_select_mitigation(void) static_branch_enable(&mds_user_clear); - if (mds_nosmt && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } -- cgit From 5dd50aaeb1853ee0953b60fa6d1143d95429ae7b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Nov 2018 17:40:31 +0000 Subject: Make anon_inodes unconditional Make the anon_inodes facility unconditional so that it can be used by core VFS code and pidfd code. Signed-off-by: David Howells Signed-off-by: Al Viro [christian@brauner.io: adapt commit message to mention pidfds] Signed-off-by: Christian Brauner --- arch/x86/Kconfig | 1 - arch/x86/kvm/Kconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5ad92419be19..7a70fb58b2d0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -44,7 +44,6 @@ config X86 # select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ANON_INODES select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_DATA select ARCH_CLOCKSOURCE_INIT diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 72fa955f4a15..fc042419e670 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,7 +27,6 @@ config KVM depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER - select ANON_INODES select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select IRQ_BYPASS_MANAGER -- cgit From 5ce5d8a5a4ae64b281bea7ae028c960f12acae21 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Apr 2019 22:51:49 +0200 Subject: asm-generic: generalize asm/sockios.h ia64, parisc and sparc just use a copy of the generic version of asm/sockios.h, and x86 is a redirect to the same file, so we can just let the header file be generated. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- arch/x86/include/uapi/asm/sockios.h | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/x86/include/uapi/asm/sockios.h (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h deleted file mode 100644 index def6d4746ee7..000000000000 --- a/arch/x86/include/uapi/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit From 877b5691f27a1aec0d9b53095a323e45c30069e2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 14 Apr 2019 17:37:09 -0700 Subject: crypto: shash - remove shash_desc::flags The flags field in 'struct shash_desc' never actually does anything. The only ostensibly supported flag is CRYPTO_TFM_REQ_MAY_SLEEP. However, no shash algorithm ever sleeps, making this flag a no-op. With this being the case, inevitably some users who can't sleep wrongly pass MAY_SLEEP. These would all need to be fixed if any shash algorithm actually started sleeping. For example, the shash_ahash_*() functions, which wrap a shash algorithm with the ahash API, pass through MAY_SLEEP from the ahash API to the shash API. However, the shash functions are called under kmap_atomic(), so actually they're assumed to never sleep. Even if it turns out that some users do need preemption points while hashing large buffers, we could easily provide a helper function crypto_shash_update_large() which divides the data into smaller chunks and calls crypto_shash_update() and cond_resched() for each chunk. It's not necessary to have a flag in 'struct shash_desc', nor is it necessary to make individual shash algorithms aware of this at all. Therefore, remove shash_desc::flags, and document that the crypto_shash_*() functions can be called from any context. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 2 -- arch/x86/power/hibernate.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 4099a0ae17dd..e3f3e6fd9d65 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -172,7 +172,6 @@ static int ghash_async_init(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return crypto_shash_init(desc); } @@ -252,7 +251,6 @@ static int ghash_async_digest(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return shash_ahash_digest(req, desc); } } diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index bcddf09b5aa3..4845b8c7be7f 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -90,7 +90,6 @@ static int get_e820_md5(struct e820_table *table, void *buf) } desc->tfm = tfm; - desc->flags = 0; size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry) * table->nr_entries; -- cgit From 711aef1bbf88212a21f7103e88f397b47a528805 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Sat, 27 Apr 2019 16:28:26 +0800 Subject: bpf, x32: Fix bug for BPF_JMP | {BPF_JSGT, BPF_JSLE, BPF_JSLT, BPF_JSGE} The current method to compare 64-bit numbers for conditional jump is: 1) Compare the high 32-bit first. 2) If the high 32-bit isn't the same, then goto step 4. 3) Compare the low 32-bit. 4) Check the desired condition. This method is right for unsigned comparison, but it is buggy for signed comparison, because it does signed comparison for low 32-bit too. There is only one sign bit in 64-bit number, that is the MSB in the 64-bit number, it is wrong to treat low 32-bit as signed number and do the signed comparison for it. This patch fixes the bug and adds a testcase in selftests/bpf for such bug. Signed-off-by: Wang YanQing Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp32.c | 217 ++++++++++++++++++++++++++++++++---------- 1 file changed, 166 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 0d9cdffce6ac..8097b88d744f 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -117,6 +117,8 @@ static bool is_simm32(s64 value) #define IA32_JLE 0x7E #define IA32_JG 0x7F +#define COND_JMP_OPCODE_INVALID (0xFF) + /* * Map eBPF registers to IA32 32bit registers or stack scratch space. * @@ -1613,6 +1615,75 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog) *pprog = prog; } +static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo) +{ + u8 jmp_cond; + + /* Convert BPF opcode to x86 */ + switch (op) { + case BPF_JEQ: + jmp_cond = IA32_JE; + break; + case BPF_JSET: + case BPF_JNE: + jmp_cond = IA32_JNE; + break; + case BPF_JGT: + /* GT is unsigned '>', JA in x86 */ + jmp_cond = IA32_JA; + break; + case BPF_JLT: + /* LT is unsigned '<', JB in x86 */ + jmp_cond = IA32_JB; + break; + case BPF_JGE: + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = IA32_JAE; + break; + case BPF_JLE: + /* LE is unsigned '<=', JBE in x86 */ + jmp_cond = IA32_JBE; + break; + case BPF_JSGT: + if (!is_cmp_lo) + /* Signed '>', GT in x86 */ + jmp_cond = IA32_JG; + else + /* GT is unsigned '>', JA in x86 */ + jmp_cond = IA32_JA; + break; + case BPF_JSLT: + if (!is_cmp_lo) + /* Signed '<', LT in x86 */ + jmp_cond = IA32_JL; + else + /* LT is unsigned '<', JB in x86 */ + jmp_cond = IA32_JB; + break; + case BPF_JSGE: + if (!is_cmp_lo) + /* Signed '>=', GE in x86 */ + jmp_cond = IA32_JGE; + else + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = IA32_JAE; + break; + case BPF_JSLE: + if (!is_cmp_lo) + /* Signed '<=', LE in x86 */ + jmp_cond = IA32_JLE; + else + /* LE is unsigned '<=', JBE in x86 */ + jmp_cond = IA32_JBE; + break; + default: /* to silence GCC warning */ + jmp_cond = COND_JMP_OPCODE_INVALID; + break; + } + + return jmp_cond; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -2069,10 +2140,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JLE | BPF_X: - case BPF_JMP | BPF_JSGT | BPF_X: - case BPF_JMP | BPF_JSLE | BPF_X: - case BPF_JMP | BPF_JSLT | BPF_X: - case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP32 | BPF_JEQ | BPF_X: case BPF_JMP32 | BPF_JNE | BPF_X: case BPF_JMP32 | BPF_JGT | BPF_X: @@ -2118,6 +2185,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); goto emit_cond_jmp; } + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: { + u8 dreg_lo = dstk ? IA32_EAX : dst_lo; + u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 sreg_lo = sstk ? IA32_ECX : src_lo; + u8 sreg_hi = sstk ? IA32_EBX : src_hi; + + if (dstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), + STACK_VAR(dst_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); + } + + if (sstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), + STACK_VAR(src_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EBX), + STACK_VAR(src_hi)); + } + + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 10); + /* cmp dreg_lo,sreg_lo */ + EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); + goto emit_cond_jmp_signed; + } case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: { bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; @@ -2194,10 +2295,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JLE | BPF_K: - case BPF_JMP | BPF_JSGT | BPF_K: - case BPF_JMP | BPF_JSLE | BPF_K: - case BPF_JMP | BPF_JSLT | BPF_K: - case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JEQ | BPF_K: case BPF_JMP32 | BPF_JNE | BPF_K: case BPF_JMP32 | BPF_JGT | BPF_K: @@ -2238,50 +2335,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* cmp dreg_lo,sreg_lo */ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); -emit_cond_jmp: /* Convert BPF opcode to x86 */ - switch (BPF_OP(code)) { - case BPF_JEQ: - jmp_cond = IA32_JE; - break; - case BPF_JSET: - case BPF_JNE: - jmp_cond = IA32_JNE; - break; - case BPF_JGT: - /* GT is unsigned '>', JA in x86 */ - jmp_cond = IA32_JA; - break; - case BPF_JLT: - /* LT is unsigned '<', JB in x86 */ - jmp_cond = IA32_JB; - break; - case BPF_JGE: - /* GE is unsigned '>=', JAE in x86 */ - jmp_cond = IA32_JAE; - break; - case BPF_JLE: - /* LE is unsigned '<=', JBE in x86 */ - jmp_cond = IA32_JBE; - break; - case BPF_JSGT: - /* Signed '>', GT in x86 */ - jmp_cond = IA32_JG; - break; - case BPF_JSLT: - /* Signed '<', LT in x86 */ - jmp_cond = IA32_JL; - break; - case BPF_JSGE: - /* Signed '>=', GE in x86 */ - jmp_cond = IA32_JGE; - break; - case BPF_JSLE: - /* Signed '<=', LE in x86 */ - jmp_cond = IA32_JLE; - break; - default: /* to silence GCC warning */ +emit_cond_jmp: jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false); + if (jmp_cond == COND_JMP_OPCODE_INVALID) return -EFAULT; - } jmp_offset = addrs[i + insn->off] - addrs[i]; if (is_imm8(jmp_offset)) { EMIT2(jmp_cond, jmp_offset); @@ -2291,7 +2347,66 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ pr_err("cond_jmp gen bug %llx\n", jmp_offset); return -EFAULT; } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: { + u8 dreg_lo = dstk ? IA32_EAX : dst_lo; + u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 sreg_lo = IA32_ECX; + u8 sreg_hi = IA32_EBX; + u32 hi; + + if (dstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), + STACK_VAR(dst_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); + } + + /* mov ecx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); + hi = imm32 & (1 << 31) ? (u32)~0 : 0; + /* mov ebx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 10); + /* cmp dreg_lo,sreg_lo */ + EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); + + /* + * For simplicity of branch offset computation, + * let's use fixed jump coding here. + */ +emit_cond_jmp_signed: /* Check the condition for low 32-bit comparison */ + jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true); + if (jmp_cond == COND_JMP_OPCODE_INVALID) + return -EFAULT; + jmp_offset = addrs[i + insn->off] - addrs[i] + 8; + if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + EMIT2(0xEB, 6); + /* Check the condition for high 32-bit comparison */ + jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false); + if (jmp_cond == COND_JMP_OPCODE_INVALID) + return -EFAULT; + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } break; } case BPF_JMP | BPF_JA: -- cgit From b9aa0b35d878dff9ed19f94101fe353a4de00cc4 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Sun, 28 Apr 2019 10:33:02 +0800 Subject: bpf, x32: Fix bug for BPF_ALU64 | BPF_NEG The current implementation has two errors: 1: The second xor instruction will clear carry flag which is necessary for following sbb instruction. 2: The select coding for sbb instruction is wrong, the coding is "sbb dreg_hi,ecx", but what we need is "sbb ecx,dreg_hi". This patch rewrites the implementation and fixes the errors. This patch fixes below errors reported by bpf/test_verifier in x32 platform when the jit is enabled: " 0: (b4) w1 = 4 1: (b4) w2 = 4 2: (1f) r2 -= r1 3: (4f) r2 |= r1 4: (87) r2 = -r2 5: (c7) r2 s>>= 63 6: (5f) r1 &= r2 7: (bf) r0 = r1 8: (95) exit processed 9 insns (limit 131072), stack depth 0 0: (b4) w1 = 4 1: (b4) w2 = 4 2: (1f) r2 -= r1 3: (4f) r2 |= r1 4: (87) r2 = -r2 5: (c7) r2 s>>= 63 6: (5f) r1 &= r2 7: (bf) r0 = r1 8: (95) exit processed 9 insns (limit 131072), stack depth 0 ...... Summary: 1189 PASSED, 125 SKIPPED, 15 FAILED " Signed-off-by: Wang YanQing Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp32.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 8097b88d744f..b29e82f190c7 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -700,19 +700,12 @@ static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog) STACK_VAR(dst_hi)); } - /* xor ecx,ecx */ - EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* sub dreg_lo,ecx */ - EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX)); - /* mov dreg_lo,ecx */ - EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); - - /* xor ecx,ecx */ - EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* sbb dreg_hi,ecx */ - EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX)); - /* mov dreg_hi,ecx */ - EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX)); + /* neg dreg_lo */ + EMIT2(0xF7, add_1reg(0xD8, dreg_lo)); + /* adc dreg_hi,0x0 */ + EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00); + /* neg dreg_hi */ + EMIT2(0xF7, add_1reg(0xD8, dreg_hi)); if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ -- cgit From d9c9ce34ed5c892323cbf5b4f9a4c498e036316a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 2 May 2019 19:11:39 +0200 Subject: x86/fpu: Fault-in user stack if copy_fpstate_to_sigframe() fails In the compacted form, XSAVES may save only the XMM+SSE state but skip FP (x87 state). This is denoted by header->xfeatures = 6. The fastpath (copy_fpregs_to_sigframe()) does that but _also_ initialises the FP state (cwd to 0x37f, mxcsr as we do, remaining fields to 0). The slowpath (copy_xstate_to_user()) leaves most of the FP state untouched. Only mxcsr and mxcsr_flags are set due to xfeatures_mxcsr_quirk(). Now that XFEATURE_MASK_FP is set unconditionally, see 04944b793e18 ("x86: xsave: set FP, SSE bits in the xsave header in the user sigcontext"), on return from the signal, random garbage is loaded as the FP state. Instead of utilizing copy_xstate_to_user(), fault-in the user memory and retry the fast path. Ideally, the fast path succeeds on the second attempt but may be retried again if the memory is swapped out due to memory pressure. If the user memory can not be faulted-in then get_user_pages() returns an error so we don't loop forever. Fault in memory via get_user_pages_unlocked() so copy_fpregs_to_sigframe() succeeds without a fault. Fixes: 69277c98f5eef ("x86/fpu: Always store the registers in copy_fpstate_to_sigframe()") Reported-by: Kurt Kanzenbach Suggested-by: Dave Hansen Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: "linux-mm@kvack.org" Cc: Qian Cai Cc: Rik van Riel Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190502171139.mqtegctsg35cir2e@linutronix.de --- arch/x86/kernel/fpu/signal.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 7026f1c4e5e3..5a8d118bc423 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -157,11 +157,9 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) */ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { - struct fpu *fpu = ¤t->thread.fpu; - struct xregs_state *xsave = &fpu->state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); - int ret = -EFAULT; + int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -174,11 +172,12 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; +retry: /* * Load the FPU registers if they are not valid for the current task. * With a valid FPU state we can attempt to save the state directly to - * userland's stack frame which will likely succeed. If it does not, do - * the slowpath. + * userland's stack frame which will likely succeed. If it does not, + * resolve the fault in the user memory and try again. */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) @@ -187,20 +186,20 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) pagefault_disable(); ret = copy_fpregs_to_sigframe(buf_fx); pagefault_enable(); - if (ret && !test_thread_flag(TIF_NEED_FPU_LOAD)) - copy_fpregs_to_fpstate(fpu); - set_thread_flag(TIF_NEED_FPU_LOAD); fpregs_unlock(); if (ret) { - if (using_compacted_format()) { - if (copy_xstate_to_user(buf_fx, xsave, 0, size)) - return -1; - } else { - fpstate_sanitize_xstate(fpu); - if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) - return -1; - } + int aligned_size; + int nr_pages; + + aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size; + nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE); + + ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages, + NULL, FOLL_WRITE); + if (ret == nr_pages) + goto retry; + return -EFAULT; } /* Save the fsave header for the 32-bit frames. */ -- cgit From c5bf68fe0c86a5835bd2e6aead1c49976360753f Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 26 Mar 2019 23:51:19 +0300 Subject: *: convert stream-like files from nonseekable_open -> stream_open Using scripts/coccinelle/api/stream_open.cocci added in 10dce8af3422 ("fs: stream_open - opener for stream-like files so that read and write can run simultaneously without deadlock"), search and convert to stream_open all in-kernel nonseekable_open users for which read and write actually do not depend on ppos and where there is no other methods in file_operations which assume @offset access. I've verified each generated change manually - that it is correct to convert - and each other nonseekable_open instance left - that it is either not correct to convert there, or that it is not converted due to current stream_open.cocci limitations. The script also does not convert files that should be valid to convert, but that currently have .llseek = noop_llseek or generic_file_llseek for unknown reason despite file being opened with nonseekable_open (e.g. drivers/input/mousedev.c) Among cases converted 14 were potentially vulnerable to read vs write deadlock (see details in 10dce8af3422): drivers/char/pcmcia/cm4000_cs.c:1685:7-23: ERROR: cm4000_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/gnss/core.c:45:1-17: ERROR: gnss_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/hid/uhid.c:635:1-17: ERROR: uhid_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/infiniband/core/user_mad.c:988:1-17: ERROR: umad_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/input/evdev.c:527:1-17: ERROR: evdev_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/input/misc/uinput.c:401:1-17: ERROR: uinput_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/isdn/capi/capi.c:963:8-24: ERROR: capi_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/leds/uleds.c:77:1-17: ERROR: uleds_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/media/rc/lirc_dev.c:198:1-17: ERROR: lirc_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/s390/char/fs3270.c:488:1-17: ERROR: fs3270_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/usb/misc/ldusb.c:310:1-17: ERROR: ld_usb_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. drivers/xen/evtchn.c:667:8-24: ERROR: evtchn_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. net/batman-adv/icmp_socket.c:80:1-17: ERROR: batadv_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. net/rfkill/core.c:1146:8-24: ERROR: rfkill_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. and the rest were just safe to convert to stream_open because their read and write do not use ppos at all and corresponding file_operations do not have methods that assume @offset file access(*): arch/powerpc/platforms/52xx/mpc52xx_gpt.c:631:8-24: WARNING: mpc52xx_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. arch/powerpc/platforms/cell/spufs/file.c:591:8-24: WARNING: spufs_ibox_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. arch/powerpc/platforms/cell/spufs/file.c:591:8-24: WARNING: spufs_ibox_stat_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. arch/powerpc/platforms/cell/spufs/file.c:591:8-24: WARNING: spufs_mbox_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. arch/powerpc/platforms/cell/spufs/file.c:591:8-24: WARNING: spufs_mbox_stat_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. arch/powerpc/platforms/cell/spufs/file.c:591:8-24: WARNING: spufs_wbox_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. arch/powerpc/platforms/cell/spufs/file.c:591:8-24: WARNING: spufs_wbox_stat_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. arch/um/drivers/harddog_kern.c:88:8-24: WARNING: harddog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. arch/x86/kernel/cpu/microcode/core.c:430:33-49: WARNING: microcode_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/char/ds1620.c:215:8-24: WARNING: ds1620_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/char/dtlk.c:301:1-17: WARNING: dtlk_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/char/ipmi/ipmi_watchdog.c:840:9-25: WARNING: ipmi_wdog_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/char/pcmcia/scr24x_cs.c:95:8-24: WARNING: scr24x_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/char/tb0219.c:246:9-25: WARNING: tb0219_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/firewire/nosy.c:306:8-24: WARNING: nosy_ops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/hwmon/fschmd.c:840:8-24: WARNING: watchdog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/hwmon/w83793.c:1344:8-24: WARNING: watchdog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/infiniband/core/ucma.c:1747:8-24: WARNING: ucma_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/infiniband/core/ucm.c:1178:8-24: WARNING: ucm_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/infiniband/core/uverbs_main.c:1086:8-24: WARNING: uverbs_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/input/joydev.c:282:1-17: WARNING: joydev_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/pci/switch/switchtec.c:393:1-17: WARNING: switchtec_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/platform/chrome/cros_ec_debugfs.c:135:8-24: WARNING: cros_ec_console_log_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/rtc/rtc-ds1374.c:470:9-25: WARNING: ds1374_wdt_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/rtc/rtc-m41t80.c:805:9-25: WARNING: wdt_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/s390/char/tape_char.c:293:2-18: WARNING: tape_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/s390/char/zcore.c:194:8-24: WARNING: zcore_reipl_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/s390/crypto/zcrypt_api.c:528:8-24: WARNING: zcrypt_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/spi/spidev.c:594:1-17: WARNING: spidev_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/staging/pi433/pi433_if.c:974:1-17: WARNING: pi433_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/acquirewdt.c:203:8-24: WARNING: acq_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/advantechwdt.c:202:8-24: WARNING: advwdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/alim1535_wdt.c:252:8-24: WARNING: ali_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/alim7101_wdt.c:217:8-24: WARNING: wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/ar7_wdt.c:166:8-24: WARNING: ar7_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/at91rm9200_wdt.c:113:8-24: WARNING: at91wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/ath79_wdt.c:135:8-24: WARNING: ath79_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/bcm63xx_wdt.c:119:8-24: WARNING: bcm63xx_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/cpu5wdt.c:143:8-24: WARNING: cpu5wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/cpwd.c:397:8-24: WARNING: cpwd_fops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/eurotechwdt.c:319:8-24: WARNING: eurwdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/f71808e_wdt.c:528:8-24: WARNING: watchdog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/gef_wdt.c:232:8-24: WARNING: gef_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/geodewdt.c:95:8-24: WARNING: geodewdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/ib700wdt.c:241:8-24: WARNING: ibwdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/ibmasr.c:326:8-24: WARNING: asr_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/indydog.c:80:8-24: WARNING: indydog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/intel_scu_watchdog.c:307:8-24: WARNING: intel_scu_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/iop_wdt.c:104:8-24: WARNING: iop_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/it8712f_wdt.c:330:8-24: WARNING: it8712f_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/ixp4xx_wdt.c:68:8-24: WARNING: ixp4xx_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/ks8695_wdt.c:145:8-24: WARNING: ks8695wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/m54xx_wdt.c:88:8-24: WARNING: m54xx_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/machzwd.c:336:8-24: WARNING: zf_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/mixcomwd.c:153:8-24: WARNING: mixcomwd_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/mtx-1_wdt.c:121:8-24: WARNING: mtx1_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/mv64x60_wdt.c:136:8-24: WARNING: mv64x60_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/nuc900_wdt.c:134:8-24: WARNING: nuc900wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/nv_tco.c:164:8-24: WARNING: nv_tco_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pc87413_wdt.c:289:8-24: WARNING: pc87413_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pcwd.c:698:8-24: WARNING: pcwd_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pcwd.c:737:8-24: WARNING: pcwd_temp_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pcwd_pci.c:581:8-24: WARNING: pcipcwd_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pcwd_pci.c:623:8-24: WARNING: pcipcwd_temp_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pcwd_usb.c:488:8-24: WARNING: usb_pcwd_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pcwd_usb.c:527:8-24: WARNING: usb_pcwd_temperature_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pika_wdt.c:121:8-24: WARNING: pikawdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/pnx833x_wdt.c:119:8-24: WARNING: pnx833x_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/rc32434_wdt.c:153:8-24: WARNING: rc32434_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/rdc321x_wdt.c:145:8-24: WARNING: rdc321x_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/riowd.c:79:1-17: WARNING: riowd_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sa1100_wdt.c:62:8-24: WARNING: sa1100dog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sbc60xxwdt.c:211:8-24: WARNING: wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sbc7240_wdt.c:139:8-24: WARNING: wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sbc8360.c:274:8-24: WARNING: sbc8360_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sbc_epx_c3.c:81:8-24: WARNING: epx_c3_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sbc_fitpc2_wdt.c:78:8-24: WARNING: fitpc2_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sb_wdog.c:108:1-17: WARNING: sbwdog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sc1200wdt.c:181:8-24: WARNING: sc1200wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sc520_wdt.c:261:8-24: WARNING: wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/sch311x_wdt.c:319:8-24: WARNING: sch311x_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/scx200_wdt.c:105:8-24: WARNING: scx200_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/smsc37b787_wdt.c:369:8-24: WARNING: wb_smsc_wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/w83877f_wdt.c:227:8-24: WARNING: wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/w83977f_wdt.c:301:8-24: WARNING: wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wafer5823wdt.c:200:8-24: WARNING: wafwdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/watchdog_dev.c:828:8-24: WARNING: watchdog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdrtas.c:379:8-24: WARNING: wdrtas_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdrtas.c:445:8-24: WARNING: wdrtas_temp_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdt285.c:104:1-17: WARNING: watchdog_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdt977.c:276:8-24: WARNING: wdt977_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdt.c:424:8-24: WARNING: wdt_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdt.c:484:8-24: WARNING: wdt_temp_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdt_pci.c:464:8-24: WARNING: wdtpci_fops: .write() has stream semantic; safe to change nonseekable_open -> stream_open. drivers/watchdog/wdt_pci.c:527:8-24: WARNING: wdtpci_temp_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. net/batman-adv/log.c:105:1-17: WARNING: batadv_log_fops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. sound/core/control.c:57:7-23: WARNING: snd_ctl_f_ops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. sound/core/rawmidi.c:385:7-23: WARNING: snd_rawmidi_f_ops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. sound/core/seq/seq_clientmgr.c:310:7-23: WARNING: snd_seq_f_ops: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open. sound/core/timer.c:1428:7-23: WARNING: snd_timer_f_ops: .read() has stream semantic; safe to change nonseekable_open -> stream_open. One can also recheck/review the patch via generating it with explanation comments included via $ make coccicheck MODE=patch COCCI=scripts/coccinelle/api/stream_open.cocci SPFLAGS="-D explain" (*) This second group also contains cases with read/write deadlocks that stream_open.cocci don't yet detect, but which are still valid to convert to stream_open since ppos is not used. For example drivers/pci/switch/switchtec.c calls wait_for_completion_interruptible() in its .read, but stream_open.cocci currently detects only "wait_event*" as blocking. Cc: Michael Kerrisk Cc: Yongzhi Pan Cc: Jonathan Corbet Cc: David Vrabel Cc: Juergen Gross Cc: Miklos Szeredi Cc: Tejun Heo Cc: Kirill Tkhai Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Julia Lawall Cc: Nikolaus Rath Cc: Han-Wen Nienhuys Cc: Anatolij Gustschin Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: Borislav Petkov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "James R. Van Zandt" Cc: Corey Minyard Cc: Harald Welte Acked-by: Lubomir Rintel [scr24x_cs] Cc: Stefan Richter Cc: Johan Hovold Cc: David Herrmann Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Jean Delvare Acked-by: Guenter Roeck [watchdog/* hwmon/*] Cc: Rudolf Marek Cc: Dmitry Torokhov Cc: Karsten Keil Cc: Jacek Anaszewski Cc: Pavel Machek Cc: Mauro Carvalho Chehab Cc: Kurt Schwemmer Acked-by: Logan Gunthorpe [drivers/pci/switch/switchtec] Acked-by: Bjorn Helgaas [drivers/pci/switch/switchtec] Cc: Benson Leung Acked-by: Enric Balletbo i Serra [platform/chrome] Cc: Alessandro Zummo Acked-by: Alexandre Belloni [rtc/*] Cc: Mark Brown Cc: Wim Van Sebroeck Cc: Florian Fainelli Cc: bcm-kernel-feedback-list@broadcom.com Cc: Wan ZongShun Cc: Zwane Mwaikambo Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Cc: "David S. Miller" Cc: Johannes Berg Cc: Jaroslav Kysela Cc: Takashi Iwai Signed-off-by: Kirill Smelkov --- arch/x86/kernel/cpu/microcode/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5260185cbf7b..639817729ed4 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -427,7 +427,7 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *inode, struct file *file) { - return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; + return capable(CAP_SYS_RAWIO) ? stream_open(inode, file) : -EPERM; } static ssize_t microcode_write(struct file *file, const char __user *buf, -- cgit From 73b0140bf0fe9df90fb267c00673c4b9bf285430 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 13 May 2019 17:17:11 -0700 Subject: mm/gup: change GUP fast to use flags rather than a write 'bool' To facilitate additional options to get_user_pages_fast() change the singular write parameter to be gup_flags. This patch does not change any functionality. New functionality will follow in subsequent patches. Some of the get_user_pages_fast() call sites were unchanged because they already passed FOLL_WRITE or 0 for the write parameter. NOTE: It was suggested to change the ordering of the get_user_pages_fast() arguments to ensure that callers were converted. This breaks the current GUP call site convention of having the returned pages be the final parameter. So the suggestion was rejected. Link: http://lkml.kernel.org/r/20190328084422.29911-4-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190317183438.2057-4-ira.weiny@intel.com Signed-off-by: Ira Weiny Reviewed-by: Mike Marshall Cc: Aneesh Kumar K.V Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dan Williams Cc: "David S. Miller" Cc: Heiko Carstens Cc: Ingo Molnar Cc: James Hogan Cc: Jason Gunthorpe Cc: John Hubbard Cc: "Kirill A. Shutemov" Cc: Martin Schwidefsky Cc: Michal Hocko Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Thomas Gleixner Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/paging_tmpl.h | 2 +- arch/x86/kvm/svm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bdca39829bc..08715034e315 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -140,7 +140,7 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t *table; struct page *page; - npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); /* Check if the user is doing something meaningless. */ if (unlikely(npages != 1)) return -EFAULT; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 406b558abfef..6b92eaf4a3b1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1805,7 +1805,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, return NULL; /* Pin the user virtual address. */ - npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); + npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); if (npinned != npages) { pr_err("SEV: Failure locking %lu pages.\n", npages); goto err; -- cgit From 8df995f6bde01de96ce93373785f41c3bd13ad1c Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 13 May 2019 17:19:00 -0700 Subject: mm: simplify MEMORY_ISOLATION && COMPACTION || CMA into CONTIG_ALLOC This condition allows to define alloc_contig_range, so simplify it into a more accurate naming. Link: http://lkml.kernel.org/r/20190327063626.18421-4-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Cc: Andy Lutomirsky Cc: Aneesh Kumar K.V Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Heiko Carstens Cc: "H . Peter Anvin" Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Mike Kravetz Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 +- arch/x86/mm/hugetlbpage.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e7212731cffb..526d95abfe5e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,7 +22,7 @@ config X86_64 def_bool y depends on 64BIT # Options that are inherently 64-bit kernel only: - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select ARCH_HAS_GIGANTIC_PAGE if CONTIG_ALLOC select ARCH_SUPPORTS_INT128 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 92e4c4b85bba..fab095362c50 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -203,7 +203,7 @@ static __init int setup_hugepagesz(char *opt) } __setup("hugepagesz=", setup_hugepagesz); -#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) +#ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) { /* With compaction or CMA we can allocate gigantic pages at runtime */ -- cgit From 4eb0716e868eed963967adb0b1b11d9bd8ca1d01 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 13 May 2019 17:19:04 -0700 Subject: hugetlb: allow to free gigantic pages regardless of the configuration On systems without CONTIG_ALLOC activated but that support gigantic pages, boottime reserved gigantic pages can not be freed at all. This patch simply enables the possibility to hand back those pages to memory allocator. Link: http://lkml.kernel.org/r/20190327063626.18421-5-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: David S. Miller [sparc] Reviewed-by: Mike Kravetz Cc: Andy Lutomirsky Cc: Aneesh Kumar K.V Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: Heiko Carstens Cc: "H . Peter Anvin" Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/hugetlb.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 526d95abfe5e..f21bc56e5d7b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,7 +22,7 @@ config X86_64 def_bool y depends on 64BIT # Options that are inherently 64-bit kernel only: - select ARCH_HAS_GIGANTIC_PAGE if CONTIG_ALLOC + select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 7469d321f072..f65cfb48cfdd 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -17,8 +17,4 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif - #endif /* _ASM_X86_HUGETLB_H */ -- cgit From 940519f0c8b757fdcbc5d14c93cdaada20ded14c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 13 May 2019 17:21:26 -0700 Subject: mm, memory_hotplug: provide a more generic restrictions for memory hotplug arch_add_memory, __add_pages take a want_memblock which controls whether the newly added memory should get the sysfs memblock user API (e.g. ZONE_DEVICE users do not want/need this interface). Some callers even want to control where do we allocate the memmap from by configuring altmap. Add a more generic hotplug context for arch_add_memory and __add_pages. struct mhp_restrictions contains flags which contains additional features to be enabled by the memory hotplug (MHP_MEMBLOCK_API currently) and altmap for alternative memmap allocator. This patch shouldn't introduce any functional change. [akpm@linux-foundation.org: build fix] Link: http://lkml.kernel.org/r/20190408082633.2864-3-osalvador@suse.de Signed-off-by: Michal Hocko Signed-off-by: Oscar Salvador Cc: Dan Williams Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_32.c | 6 +++--- arch/x86/mm/init_64.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 85c94f9a87f8..755dbed85531 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -850,13 +850,13 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bccff68e3267..db42c11b48fb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -777,11 +777,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock) + struct mhp_restrictions *restrictions) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -791,15 +791,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return add_pages(nid, start_pfn, nr_pages, restrictions); } #define PAGE_INUSE 0xFD -- cgit From ac5c94264580f498e484c854031d0226b3c1038f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 13 May 2019 17:21:46 -0700 Subject: mm/memory_hotplug: make __remove_pages() and arch_remove_memory() never fail All callers of arch_remove_memory() ignore errors. And we should really try to remove any errors from the memory removal path. No more errors are reported from __remove_pages(). BUG() in s390x code in case arch_remove_memory() is triggered. We may implement that properly later. WARN in case powerpc code failed to remove the section mapping, which is better than ignoring the error completely right now. Link: http://lkml.kernel.org/r/20190409100148.24703-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Rich Felker Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Kirill A. Shutemov" Cc: Christophe Leroy Cc: Stefan Agner Cc: Nicholas Piggin Cc: Pavel Tatashin Cc: Vasily Gorbik Cc: Arun KS Cc: Geert Uytterhoeven Cc: Masahiro Yamada Cc: Rob Herring Cc: Joonsoo Kim Cc: Wei Yang Cc: Qian Cai Cc: Mathieu Malaterre Cc: Andrew Banman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Mike Travis Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_32.c | 5 +++-- arch/x86/mm/init_64.c | 10 +++------- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 755dbed85531..075e568098f2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -860,14 +860,15 @@ int arch_add_memory(int nid, u64 start, u64 size, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index db42c11b48fb..20d14254b686 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1141,24 +1141,20 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true, NULL); } -int __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); struct zone *zone; - int ret; /* With altmap the first mapped page is offset from @start */ if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - WARN_ON_ONCE(ret); + __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); - - return ret; } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit From 350e88bad4964da6feabee02a1a70381bcdb087e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 13 May 2019 17:22:59 -0700 Subject: mm: memblock: make keeping memblock memory opt-in rather than opt-out Most architectures do not need the memblock memory after the page allocator is initialized, but only few enable ARCH_DISCARD_MEMBLOCK in the arch Kconfig. Replacing ARCH_DISCARD_MEMBLOCK with ARCH_KEEP_MEMBLOCK and inverting the logic makes it clear which architectures actually use memblock after system initialization and skips the necessity to add ARCH_DISCARD_MEMBLOCK to the architectures that are still missing that option. Link: http://lkml.kernel.org/r/1556102150-32517-1-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Michael Ellerman (powerpc) Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Richard Kuo Cc: Tony Luck Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: Ley Foon Tan Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Rich Felker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f21bc56e5d7b..818b361094ed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -47,7 +47,6 @@ config X86 select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_DATA select ARCH_CLOCKSOURCE_INIT - select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED -- cgit From 687a3e4d8e6129f064711291f1564d95472dba3e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 14 May 2019 15:41:45 -0700 Subject: treewide: remove SPDX "WITH Linux-syscall-note" from kernel-space headers The "WITH Linux-syscall-note" should be added to headers exported to the user-space. Some kernel-space headers have "WITH Linux-syscall-note", which seems a mistake. [1] arch/x86/include/asm/hyperv-tlfs.h Commit 5a4858032217 ("x86/hyper-v: move hyperv.h out of uapi") moved this file out of uapi, but missed to update the SPDX License tag. [2] include/asm-generic/shmparam.h Commit 76ce2a80a28e ("Rename include/{uapi => }/asm-generic/shmparam.h really") moved this file out of uapi, but missed to update the SPDX License tag. [3] include/linux/qcom-geni-se.h Commit eddac5af0654 ("soc: qcom: Add GENI based QUP Wrapper driver") added this file, but I do not see a good reason why its license tag must include "WITH Linux-syscall-note". Link: http://lkml.kernel.org/r/1554196104-3522-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/hyperv-tlfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 2bdbbbcfa393..cdf44aa9a501 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This file contains definitions from Hyper-V Hypervisor Top-Level Functional -- cgit From 9012d011660ea5cf2a623e1de207a2bc0ca6936d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 14 May 2019 15:42:25 -0700 Subject: compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING Commit 60a3cdd06394 ("x86: add optimized inlining") introduced CONFIG_OPTIMIZE_INLINING, but it has been available only for x86. The idea is obviously arch-agnostic. This commit moves the config entry from arch/x86/Kconfig.debug to lib/Kconfig.debug so that all architectures can benefit from it. This can make a huge difference in kernel image size especially when CONFIG_OPTIMIZE_FOR_SIZE is enabled. For example, I got 3.5% smaller arm64 kernel for v5.1-rc1. dec file 18983424 arch/arm64/boot/Image.before 18321920 arch/arm64/boot/Image.after This also slightly improves the "Kernel hacking" Kconfig menu as e61aca5158a8 ("Merge branch 'kconfig-diet' from Dave Hansen') suggested; this config option would be a good fit in the "compiler option" menu. Link: http://lkml.kernel.org/r/20190423034959.13525-12-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Borislav Petkov Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Boris Brezillon Cc: Brian Norris Cc: Christophe Leroy Cc: David Woodhouse Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Marek Vasut Cc: Mark Rutland Cc: Mathieu Malaterre Cc: Miquel Raynal Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Weinberger Cc: Russell King Cc: Stefan Agner Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 3 --- arch/x86/Kconfig.debug | 14 -------------- 2 files changed, 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 818b361094ed..326b2d5bab9d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -305,9 +305,6 @@ config ZONE_DMA32 config AUDIT_ARCH def_bool y if X86_64 -config ARCH_SUPPORTS_OPTIMIZED_INLINING - def_bool y - config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 15d0fbe27872..f730680dc818 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -266,20 +266,6 @@ config CPA_DEBUG ---help--- Do change_page_attr() self-tests every 30 seconds. -config OPTIMIZE_INLINING - bool "Allow gcc to uninline functions marked 'inline'" - ---help--- - This option determines if the kernel forces gcc to inline the functions - developers have marked 'inline'. Doing so takes away freedom from gcc to - do what it thinks is best, which is desirable for the gcc 3.x series of - compilers. The gcc 4.x series have a rewritten inlining algorithm and - enabling this option will generate a smaller kernel there. Hopefully - this algorithm is so good that allowing gcc 4.x and above to make the - decision will become the default in the future. Until then this option - is there to test gcc for this. - - If unsure, say N. - config DEBUG_ENTRY bool "Debug low-level entry code" depends on DEBUG_KERNEL -- cgit From 87dfb311b707cd4c4b666c9af0fa15acbe6eee99 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 14 May 2019 15:46:51 -0700 Subject: treewide: replace #include with #include Since commit dccd2304cc90 ("ARM: 7430/1: sizes.h: move from asm-generic to "), and are just wrappers of . This commit replaces all and to prepare for the removal. Link: http://lkml.kernel.org/r/1553267665-27228-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/events/intel/bts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 7cdd7b13bbda..890a3fb5706f 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include "../perf_event.h" -- cgit