diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/apm_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 66 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 96 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/core.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/devicetree.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/early-quirks.c | 87 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 49 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/relocate_kernel_64.S | 8 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 2 |
20 files changed, 269 insertions, 144 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ec3a286163c3..2aa92094b59d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -36,6 +36,7 @@ #include <linux/ioport.h> #include <linux/pci.h> #include <linux/efi-bgrt.h> +#include <linux/serial_core.h> #include <asm/e820/api.h> #include <asm/irqdomain.h> @@ -1625,6 +1626,8 @@ int __init acpi_boot_init(void) if (!acpi_noirq) x86_init.pci.init = pci_acpi_init; + /* Do not enable ACPI SPCR console by default */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, false); return 0; } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 30571fdaaf6f..a481763a3776 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ @@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; - if (noreplace_paravirt) - return; - for (p = start; p < end; p++) { unsigned int used; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index ab1865342002..dc0ca8e29c75 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -2389,6 +2389,7 @@ static int __init apm_init(void) if (HZ != 100) idle_period = (idle_period * HZ) / 100; if (idle_threshold < 100) { + cpuidle_poll_state_init(&apm_idle_driver); if (!cpuidle_register_driver(&apm_idle_driver)) if (cpuidle_register_device(&apm_cpuidle_device)) cpuidle_unregister_driver(&apm_idle_driver); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ea831c858195..5bddbdcbc4a3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -556,6 +556,51 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } } +static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) +{ + u64 msr; + + /* + * BIOS support is required for SME and SEV. + * For SME: If BIOS has enabled SME then adjust x86_phys_bits by + * the SME physical address space reduction value. + * If BIOS has not enabled SME then don't advertise the + * SME feature (set in scattered.c). + * For SEV: If BIOS has not enabled SEV then don't advertise the + * SEV feature (set in scattered.c). + * + * In all cases, since support for SME and SEV requires long mode, + * don't advertise the feature under CONFIG_X86_32. + */ + if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { + /* Check if memory encryption is enabled */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + goto clear_all; + + /* + * Always adjust physical address bits. Even though this + * will be a value above 32-bits this is still done for + * CONFIG_X86_32 so that accurate values are reported. + */ + c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; + + if (IS_ENABLED(CONFIG_X86_32)) + goto clear_all; + + rdmsrl(MSR_K7_HWCR, msr); + if (!(msr & MSR_K7_HWCR_SMMLOCK)) + goto clear_sev; + + return; + +clear_all: + clear_cpu_cap(c, X86_FEATURE_SME); +clear_sev: + clear_cpu_cap(c, X86_FEATURE_SEV); + } +} + static void early_init_amd(struct cpuinfo_x86 *c) { u32 dummy; @@ -627,26 +672,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (cpu_has_amd_erratum(c, amd_erratum_400)) set_cpu_bug(c, X86_BUG_AMD_E400); - /* - * BIOS support is required for SME. If BIOS has enabled SME then - * adjust x86_phys_bits by the SME physical address space reduction - * value. If BIOS has not enabled SME then don't advertise the - * feature (set in scattered.c). Also, since the SME support requires - * long mode, don't advertise the feature under CONFIG_X86_32. - */ - if (cpu_has(c, X86_FEATURE_SME)) { - u64 msr; - - /* Check if SME is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) { - c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; - if (IS_ENABLED(CONFIG_X86_32)) - clear_cpu_cap(c, X86_FEATURE_SME); - } else { - clear_cpu_cap(c, X86_FEATURE_SME); - } - } + early_detect_mem_encrypt(c); } static void init_amd_k8(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3bfb2b23d79c..71949bf2de5a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -103,7 +103,7 @@ bool retpoline_module_ok(bool has_retpoline) if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) return true; - pr_err("System may be vunerable to spectre v2\n"); + pr_err("System may be vulnerable to spectre v2\n"); spectre_v2_bad_module = true; return false; } @@ -119,13 +119,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; } static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static void __init spec2_print_if_secure(const char *reason) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static inline bool retp_compiler(void) @@ -140,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { char arg[20]; - int ret; - - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); - if (ret > 0) { - if (match_option(arg, ret, "off")) { - goto disable; - } else if (match_option(arg, ret, "on")) { - spec2_print_if_secure("force enabled on command line."); - return SPECTRE_V2_CMD_FORCE; - } else if (match_option(arg, ret, "retpoline")) { - spec2_print_if_insecure("retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE; - } else if (match_option(arg, ret, "retpoline,amd")) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); - return SPECTRE_V2_CMD_AUTO; - } - spec2_print_if_insecure("AMD retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_AMD; - } else if (match_option(arg, ret, "retpoline,generic")) { - spec2_print_if_insecure("generic retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; - } else if (match_option(arg, ret, "auto")) { + int ret, i; + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; + + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret < 0) return SPECTRE_V2_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + } + + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; } - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); return SPECTRE_V2_CMD_AUTO; -disable: - spec2_print_if_insecure("disabled on command line."); - return SPECTRE_V2_CMD_NONE; + } + + if (mitigation_options[i].secure) + spec2_print_if_secure(mitigation_options[i].option); + else + spec2_print_if_insecure(mitigation_options[i].option); + + return cmd; } /* Check for Skylake-like CPUs (for RSB handling) */ @@ -213,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void) return; case SPECTRE_V2_CMD_FORCE: - /* FALLTRHU */ case SPECTRE_V2_CMD_AUTO: - goto retpoline_auto; - + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; case SPECTRE_V2_CMD_RETPOLINE_AMD: if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_amd; @@ -297,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } ssize_t cpu_show_spectre_v2(struct device *dev, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7c996a692fd..d63f4b5706e4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -750,6 +750,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) } } +static void init_speculation_control(struct cpuinfo_x86 *c) +{ + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + * + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware + * features, which are visible in /proc/cpuinfo and used by the + * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -844,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + init_speculation_control(c); /* * Clear/Set all flags overridden by options, after probe. @@ -879,7 +900,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initdata struct x86_cpu_id cpu_no_speculation[] = { +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, @@ -892,7 +913,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { {} }; -static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { { X86_VENDOR_AMD }, {} }; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 6936d14d4c77..319bf989fad1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -175,28 +175,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); - /* - * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, - * and they also have a different bit for STIBP support. Also, - * a hypervisor might have set the individual AMD bits even on - * Intel CPUs, for finer-grained selection of what's available. - */ - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); - } - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - /* Now if any of them are set, check the blacklist and clear the lot */ - if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_INTEL_STIBP) || + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); - clear_cpu_cap(c, X86_FEATURE_IBRS); - clear_cpu_cap(c, X86_FEATURE_IBPB); - clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_IBRS); + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); } /* diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ba1f9555fbc5..3a8e88a611eb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -14,7 +14,6 @@ #include <linux/capability.h> #include <linux/miscdevice.h> #include <linux/ratelimit.h> -#include <linux/kallsyms.h> #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/uaccess.h> @@ -235,7 +234,7 @@ static void __print_mce(struct mce *m) m->cs, m->ip); if (m->cs == __KERNEL_CS) - print_symbol("{%s}", m->ip); + pr_cont("{%pS}", (void *)m->ip); pr_cont("\n"); } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index e4fc595cd6ea..319dd65f98a2 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -560,7 +560,7 @@ static ssize_t pf_show(struct device *dev, return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); } -static DEVICE_ATTR(reload, 0200, NULL, reload_store); +static DEVICE_ATTR_WO(reload); static DEVICE_ATTR(version, 0400, version_show, NULL); static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 85eb5fc180c8..9340f41ce8d3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -251,6 +251,12 @@ static void __init ms_hyperv_init_platform(void) hyperv_setup_mmu_ops(); /* Setup the IDT for hypervisor callback */ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + + /* Setup the IDT for reenlightenment notifications */ + if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) + alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, + hyperv_reenlightenment_vector); + #endif } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 4075d2be5357..772c219b6889 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -30,6 +30,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, + { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 76e07698e6d1..25de5f6ca997 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -2,7 +2,6 @@ /* * Architecture specific OF callbacks. */ -#include <linux/bootmem.h> #include <linux/export.h> #include <linux/io.h> #include <linux/interrupt.h> @@ -39,11 +38,6 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) BUG(); } -void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) -{ - return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); -} - void __init add_dtb(u64 data) { initial_dtb = data + offsetof(struct setup_data, data); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index afbecff161d1..a2d8a3908670 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -109,7 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; - bool partial; + bool partial = false; printk("%sCall Trace:\n", log_lvl); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 1e82f787c160..bae0d32e327b 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -243,7 +243,7 @@ static void __init intel_remapping_check(int num, int slot, int func) #define KB(x) ((x) * 1024UL) #define MB(x) (KB (KB (x))) -static size_t __init i830_tseg_size(void) +static resource_size_t __init i830_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I830_ESMRAMC); @@ -256,7 +256,7 @@ static size_t __init i830_tseg_size(void) return KB(512); } -static size_t __init i845_tseg_size(void) +static resource_size_t __init i845_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I845_ESMRAMC); u8 tseg_size = esmramc & I845_TSEG_SIZE_MASK; @@ -273,7 +273,7 @@ static size_t __init i845_tseg_size(void) return 0; } -static size_t __init i85x_tseg_size(void) +static resource_size_t __init i85x_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC); @@ -283,12 +283,12 @@ static size_t __init i85x_tseg_size(void) return MB(1); } -static size_t __init i830_mem_size(void) +static resource_size_t __init i830_mem_size(void) { return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32); } -static size_t __init i85x_mem_size(void) +static resource_size_t __init i85x_mem_size(void) { return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32); } @@ -297,36 +297,36 @@ static size_t __init i85x_mem_size(void) * On 830/845/85x the stolen memory base isn't available in any * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. */ -static phys_addr_t __init i830_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i830_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i830_mem_size() - i830_tseg_size() - stolen_size; + return i830_mem_size() - i830_tseg_size() - stolen_size; } -static phys_addr_t __init i845_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i845_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i830_mem_size() - i845_tseg_size() - stolen_size; + return i830_mem_size() - i845_tseg_size() - stolen_size; } -static phys_addr_t __init i85x_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i85x_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i85x_mem_size() - i85x_tseg_size() - stolen_size; + return i85x_mem_size() - i85x_tseg_size() - stolen_size; } -static phys_addr_t __init i865_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i865_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { u16 toud = 0; toud = read_pci_config_16(0, 0, 0, I865_TOUD); - return (phys_addr_t)(toud << 16) + i845_tseg_size(); + return toud * KB(64) + i845_tseg_size(); } -static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init gen3_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { u32 bsm; @@ -337,10 +337,10 @@ static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, */ bsm = read_pci_config(num, slot, func, INTEL_BSM); - return (phys_addr_t)bsm & INTEL_BSM_MASK; + return bsm & INTEL_BSM_MASK; } -static size_t __init i830_stolen_size(int num, int slot, int func) +static resource_size_t __init i830_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -361,7 +361,7 @@ static size_t __init i830_stolen_size(int num, int slot, int func) return 0; } -static size_t __init gen3_stolen_size(int num, int slot, int func) +static resource_size_t __init gen3_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -390,7 +390,7 @@ static size_t __init gen3_stolen_size(int num, int slot, int func) return 0; } -static size_t __init gen6_stolen_size(int num, int slot, int func) +static resource_size_t __init gen6_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -398,10 +398,10 @@ static size_t __init gen6_stolen_size(int num, int slot, int func) gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK; - return (size_t)gms * MB(32); + return gms * MB(32); } -static size_t __init gen8_stolen_size(int num, int slot, int func) +static resource_size_t __init gen8_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -409,10 +409,10 @@ static size_t __init gen8_stolen_size(int num, int slot, int func) gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK; - return (size_t)gms * MB(32); + return gms * MB(32); } -static size_t __init chv_stolen_size(int num, int slot, int func) +static resource_size_t __init chv_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -426,14 +426,14 @@ static size_t __init chv_stolen_size(int num, int slot, int func) * 0x17 to 0x1d: 4MB increments start at 36MB */ if (gms < 0x11) - return (size_t)gms * MB(32); + return gms * MB(32); else if (gms < 0x17) - return (size_t)(gms - 0x11 + 2) * MB(4); + return (gms - 0x11) * MB(4) + MB(8); else - return (size_t)(gms - 0x17 + 9) * MB(4); + return (gms - 0x17) * MB(4) + MB(36); } -static size_t __init gen9_stolen_size(int num, int slot, int func) +static resource_size_t __init gen9_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -444,14 +444,15 @@ static size_t __init gen9_stolen_size(int num, int slot, int func) /* 0x0 to 0xef: 32MB increments starting at 0MB */ /* 0xf0 to 0xfe: 4MB increments starting at 4MB */ if (gms < 0xf0) - return (size_t)gms * MB(32); + return gms * MB(32); else - return (size_t)(gms - 0xf0 + 1) * MB(4); + return (gms - 0xf0) * MB(4) + MB(4); } struct intel_early_ops { - size_t (*stolen_size)(int num, int slot, int func); - phys_addr_t (*stolen_base)(int num, int slot, int func, size_t size); + resource_size_t (*stolen_size)(int num, int slot, int func); + resource_size_t (*stolen_base)(int num, int slot, int func, + resource_size_t size); }; static const struct intel_early_ops i830_early_ops __initconst = { @@ -527,16 +528,20 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_SKL_IDS(&gen9_early_ops), INTEL_BXT_IDS(&gen9_early_ops), INTEL_KBL_IDS(&gen9_early_ops), + INTEL_CFL_IDS(&gen9_early_ops), INTEL_GLK_IDS(&gen9_early_ops), INTEL_CNL_IDS(&gen9_early_ops), }; +struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); +EXPORT_SYMBOL(intel_graphics_stolen_res); + static void __init intel_graphics_stolen(int num, int slot, int func, const struct intel_early_ops *early_ops) { - phys_addr_t base, end; - size_t size; + resource_size_t base, size; + resource_size_t end; size = early_ops->stolen_size(num, slot, func); base = early_ops->stolen_base(num, slot, func, size); @@ -545,8 +550,12 @@ intel_graphics_stolen(int num, int slot, int func, return; end = base + size - 1; - printk(KERN_INFO "Reserving Intel graphics memory at %pa-%pa\n", - &base, &end); + + intel_graphics_stolen_res.start = base; + intel_graphics_stolen_res.end = end; + + printk(KERN_INFO "Reserving Intel graphics memory at %pR\n", + &intel_graphics_stolen_res); /* Mark this space as reserved */ e820__range_add(base, size, E820_TYPE_RESERVED); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 68e1867cca80..45fb4d2565f8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -142,6 +142,15 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Hypervisor callback interrupts\n"); } #endif +#if IS_ENABLED(CONFIG_HYPERV) + if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { + seq_printf(p, "%*s: ", prec, "HRE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->irq_hv_reenlightenment_count); + seq_puts(p, " Hyper-V reenlightenment interrupts\n"); + } +#endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b40ffbf156c1..4e37d1a851a6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -498,6 +498,34 @@ static void __init kvm_apf_trap_init(void) update_intr_gate(X86_TRAP_PF, async_page_fault); } +static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); + +static void kvm_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +{ + u8 state; + int cpu; + struct kvm_steal_time *src; + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); + + cpumask_copy(flushmask, cpumask); + /* + * We have to call flush only on online vCPUs. And + * queue flush_on_enter for pre-empted vCPUs + */ + for_each_cpu(cpu, flushmask) { + src = &per_cpu(steal_time, cpu); + state = READ_ONCE(src->preempted); + if ((state & KVM_VCPU_PREEMPTED)) { + if (try_cmpxchg(&src->preempted, &state, + state | KVM_VCPU_FLUSH_TLB)) + __cpumask_clear_cpu(cpu, flushmask); + } + } + + native_flush_tlb_others(flushmask, info); +} + static void __init kvm_guest_init(void) { int i; @@ -517,6 +545,9 @@ static void __init kvm_guest_init(void) pv_time_ops.steal_clock = kvm_steal_clock; } + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) + pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) apic_set_eoi_write(kvm_guest_apic_eoi_write); @@ -598,6 +629,22 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); +static __init int kvm_setup_pv_tlb_flush(void) +{ + int cpu; + + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { + for_each_possible_cpu(cpu) { + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + } + pr_info("KVM setup pv remote TLB flush\n"); + } + + return 0; +} +arch_initcall(kvm_setup_pv_tlb_flush); + #ifdef CONFIG_PARAVIRT_SPINLOCKS /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ @@ -643,7 +690,7 @@ __visible bool __kvm_vcpu_is_preempted(long cpu) { struct kvm_steal_time *src = &per_cpu(steal_time, cpu); - return !!src->preempted; + return !!(src->preempted & KVM_VCPU_PREEMPTED); } PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c75466232016..9eb448c7859d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -557,7 +557,7 @@ static void __set_personality_x32(void) * Pretend to come from a x32 execve. */ task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; - current->thread.status &= ~TS_COMPAT; + current_thread_info()->status &= ~TS_COMPAT; #endif } @@ -571,7 +571,7 @@ static void __set_personality_ia32(void) current->personality |= force_personality32; /* Prepare the first "return" to user space */ task_pt_regs(current)->orig_ax = __NR_ia32_execve; - current->thread.status |= TS_COMPAT; + current_thread_info()->status |= TS_COMPAT; #endif } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f37d18124648..ed5c4cdf0a34 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - child->thread.status |= TS_I386_REGS_POKED; + child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 307d3bac5f04..11eda21eb697 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -68,6 +68,9 @@ relocate_kernel: movq %cr4, %rax movq %rax, CR4(%r11) + /* Save CR4. Required to enable the right paging mode later. */ + movq %rax, %r13 + /* zero out flags, and disable interrupts */ pushq $0 popfq @@ -126,8 +129,13 @@ identity_mapped: /* * Set cr4 to a known state: * - physical address extension enabled + * - 5-level paging, if it was enabled before */ movl $X86_CR4_PAE, %eax + testq $X86_CR4_LA57, %r13 + jz 1f + orl $X86_CR4_LA57, %eax +1: movq %rax, %cr4 jmp 1f diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b9e00e8f1c9b..4cdc0b27ec82 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI |