diff options
Diffstat (limited to 'arch/x86/kernel')
44 files changed, 460 insertions, 337 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1c38174b5f01..21b542a6866c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -146,7 +146,11 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) pr_debug("Local APIC address 0x%08x\n", madt->address); } - if (madt->header.revision >= 5) + + /* ACPI 6.3 and newer support the online capable bit. */ + if (acpi_gbl_FADT.header.revision > 6 || + (acpi_gbl_FADT.header.revision == 6 && + acpi_gbl_FADT.minor_revision >= 3)) acpi_support_online_capable = true; default_acpi_madt_oem_check(madt->header.oem_id, @@ -193,7 +197,8 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags) if (lapic_flags & ACPI_MADT_ENABLED) return true; - if (acpi_support_online_capable && (lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) + if (!acpi_support_online_capable || + (lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) return true; return false; @@ -1853,13 +1858,18 @@ early_param("acpi_sci", setup_acpi_sci); int __acpi_acquire_global_lock(unsigned int *lock) { - unsigned int old, new; + unsigned int old, new, val; old = READ_ONCE(*lock); do { - new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = (old >> 1) & 0x1; + new = (old & ~0x3) + 2 + val; } while (!try_cmpxchg(lock, &old, new)); - return ((new & 0x3) < 3) ? -1 : 0; + + if (val) + return 0; + + return -1; } int __acpi_release_global_lock(unsigned int *lock) diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 3b7f4cdbf2e0..1328c221af30 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -111,13 +111,26 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x12345678; #else /* CONFIG_64BIT */ #ifdef CONFIG_SMP - initial_stack = (unsigned long)temp_stack + sizeof(temp_stack); - early_gdt_descr.address = - (unsigned long)get_cpu_gdt_rw(smp_processor_id()); - initial_gs = per_cpu_offset(smp_processor_id()); + /* + * As each CPU starts up, it will find its own stack pointer + * from its current_task->thread.sp. Typically that will be + * the idle thread for a newly-started AP, or even the boot + * CPU which will find it set to &init_task in the static + * per-cpu data. + * + * Make the resuming CPU use the temporary stack at startup + * by setting current->thread.sp to point to that. The true + * %rsp will be restored with the rest of the CPU context, + * by do_suspend_lowlevel(). And unwinders don't care about + * the abuse of ->thread.sp because it's a dead variable + * while the thread is running on the CPU anyway; the true + * value is in the actual %rsp register. + */ + current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack); + smpboot_control = smp_processor_id(); #endif initial_code = (unsigned long)wakeup_long64; - saved_magic = 0x123456789abcdef0L; + saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ /* diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 20d9a604da7c..770557110051 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -422,10 +422,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new) if (vector && !eilvt_entry_is_changeable(vector, new)) /* may not change if vectors are different */ return rsvd; - rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); - } while (rsvd != new); + } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new)); - rsvd &= ~APIC_EILVT_MASKED; + rsvd = new & ~APIC_EILVT_MASKED; if (rsvd && rsvd != vector) pr_info("LVT offset %d assigned for vector 0x%02x\n", offset, rsvd); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1f83b052bb74..4241dc243aa8 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,7 @@ #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/pgtable.h> +#include <asm/x86_init.h> #define for_each_ioapic(idx) \ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) @@ -2477,17 +2478,21 @@ static int io_apic_get_redir_entries(int ioapic) unsigned int arch_dynirq_lower_bound(unsigned int from) { + unsigned int ret; + /* * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use * gsi_top if ioapic_dynirq_base hasn't been initialized yet. */ - if (!ioapic_initialized) - return gsi_top; + ret = ioapic_dynirq_base ? : gsi_top; + /* - * For DT enabled machines ioapic_dynirq_base is irrelevant and not - * updated. So simply return @from if ioapic_dynirq_base == 0. + * For DT enabled machines ioapic_dynirq_base is irrelevant and + * always 0. gsi_top can be 0 if there is no IO/APIC registered. + * 0 is an invalid interrupt number for dynamic allocations. Return + * @from instead. */ - return ioapic_dynirq_base ? : from; + return ret ? : from; } #ifdef CONFIG_X86_32 @@ -2680,10 +2685,15 @@ static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pgprot_t flags = FIXMAP_PAGE_NOCACHE; /* - * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot + * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot * bits, just like normal ioremap(): */ - flags = pgprot_decrypted(flags); + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + if (x86_platform.hyper.is_private_mmio(phys)) + flags = pgprot_encrypted(flags); + else + flags = pgprot_decrypted(flags); + } __set_fixmap(idx, phys, flags); } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e696e22d0531..b2b2b7f3e03f 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,11 +9,7 @@ #include "local.h" -struct cluster_mask { - unsigned int clusterid; - int node; - struct cpumask mask; -}; +#define apic_cluster(apicid) ((apicid) >> 4) /* * __x2apic_send_IPI_mask() possibly needs to read @@ -23,8 +19,7 @@ struct cluster_mask { static u32 *x86_cpu_to_logical_apicid __read_mostly; static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); -static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; +static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) /* Collapse cpus in a cluster so a single IPI per cluster is sent */ for_each_cpu(cpu, tmpmsk) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, cpu); dest = 0; - for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) + for_each_cpu_and(clustercpu, tmpmsk, cmsk) dest |= x86_cpu_to_logical_apicid[clustercpu]; if (!dest) @@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); /* Remove cluster CPUs from tmpmask */ - cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); + cpumask_andnot(tmpmsk, tmpmsk, cmsk); } local_irq_restore(flags); @@ -105,55 +100,98 @@ static u32 x2apic_calc_apicid(unsigned int cpu) static void init_x2apic_ldr(void) { - struct cluster_mask *cmsk = this_cpu_read(cluster_masks); - u32 cluster, apicid = apic_read(APIC_LDR); - unsigned int cpu; + struct cpumask *cmsk = this_cpu_read(cluster_masks); - x86_cpu_to_logical_apicid[smp_processor_id()] = apicid; + BUG_ON(!cmsk); - if (cmsk) - goto update; - - cluster = apicid >> 16; - for_each_online_cpu(cpu) { - cmsk = per_cpu(cluster_masks, cpu); - /* Matching cluster found. Link and update it. */ - if (cmsk && cmsk->clusterid == cluster) - goto update; + cpumask_set_cpu(smp_processor_id(), cmsk); +} + +/* + * As an optimisation during boot, set the cluster_mask for all present + * CPUs at once, to prevent each of them having to iterate over the others + * to find the existing cluster_mask. + */ +static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster) +{ + int cpu_i; + + for_each_present_cpu(cpu_i) { + struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i); + u32 apicid = apic->cpu_present_to_apicid(cpu_i); + + if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster) + continue; + + if (WARN_ON_ONCE(*cpu_cmsk == cmsk)) + continue; + + BUG_ON(*cpu_cmsk); + *cpu_cmsk = cmsk; } - cmsk = cluster_hotplug_mask; - cmsk->clusterid = cluster; - cluster_hotplug_mask = NULL; -update: - this_cpu_write(cluster_masks, cmsk); - cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -static int alloc_clustermask(unsigned int cpu, int node) +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) { + struct cpumask *cmsk = NULL; + unsigned int cpu_i; + + /* + * At boot time, the CPU present mask is stable. The cluster mask is + * allocated for the first CPU in the cluster and propagated to all + * present siblings in the cluster. If the cluster mask is already set + * on entry to this function for a given CPU, there is nothing to do. + */ if (per_cpu(cluster_masks, cpu)) return 0; + + if (system_state < SYSTEM_RUNNING) + goto alloc; + /* - * If a hotplug spare mask exists, check whether it's on the right - * node. If not, free it and allocate a new one. + * On post boot hotplug for a CPU which was not present at boot time, + * iterate over all possible CPUs (even those which are not present + * any more) to find any existing cluster mask. */ - if (cluster_hotplug_mask) { - if (cluster_hotplug_mask->node == node) - return 0; - kfree(cluster_hotplug_mask); + for_each_possible_cpu(cpu_i) { + u32 apicid = apic->cpu_present_to_apicid(cpu_i); + + if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) { + cmsk = per_cpu(cluster_masks, cpu_i); + /* + * If the cluster is already initialized, just store + * the mask and return. There's no need to propagate. + */ + if (cmsk) { + per_cpu(cluster_masks, cpu) = cmsk; + return 0; + } + } } - - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), - GFP_KERNEL, node); - if (!cluster_hotplug_mask) + /* + * No CPU in the cluster has ever been initialized, so fall through to + * the boot time code which will also populate the cluster mask for any + * other CPU in the cluster which is (now) present. + */ +alloc: + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); + if (!cmsk) return -ENOMEM; - cluster_hotplug_mask->node = node; + per_cpu(cluster_masks, cpu) = cmsk; + prefill_clustermask(cmsk, cpu, cluster); + return 0; } static int x2apic_prepare_cpu(unsigned int cpu) { - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + u32 phys_apicid = apic->cpu_present_to_apicid(cpu); + u32 cluster = apic_cluster(phys_apicid); + u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); + + x86_cpu_to_logical_apicid[cpu] = logical_apicid; + + if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0) return -ENOMEM; if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) return -ENOMEM; @@ -162,10 +200,10 @@ static int x2apic_prepare_cpu(unsigned int cpu) static int x2apic_dead_cpu(unsigned int dead_cpu) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu); if (cmsk) - cpumask_clear_cpu(dead_cpu, &cmsk->mask); + cpumask_clear_cpu(dead_cpu, cmsk); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 283dcd2f62c8..dc3576303f1a 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -115,6 +115,7 @@ static void __used common(void) OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); + OFFSET(X86_current_task, pcpu_hot, current_task); #ifdef CONFIG_CALL_DEPTH_TRACKING OFFSET(X86_call_depth, pcpu_hot, call_depth); #endif diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index ffea98f9064b..22ab13966427 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -330,8 +330,8 @@ void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, struct module *mod) { struct core_text ct = { - .base = (unsigned long)mod->core_layout.base, - .end = (unsigned long)mod->core_layout.base + mod->core_layout.size, + .base = (unsigned long)mod->mem[MOD_TEXT].base, + .end = (unsigned long)mod->mem[MOD_TEXT].base + mod->mem[MOD_TEXT].size, .name = mod->name, }; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 95cdd08c4cbb..571abf808ea3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -929,6 +929,10 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 0x10) set_cpu_cap(c, X86_FEATURE_REP_GOOD); + /* AMD FSRM also implies FSRS */ + if (cpu_has(c, X86_FEATURE_FSRM)) + set_cpu_cap(c, X86_FEATURE_FSRS); + /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); @@ -1005,6 +1009,17 @@ static void init_amd(struct cpuinfo_x86 *c) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); + + /* + * Make sure EFER[AIBRSE - Automatic IBRS Enable] is set. The APs are brought up + * using the trampoline code and as part of it, MSR_EFER gets prepared there in + * order to be replicated onto them. Regardless, set it here again, if not set, + * to protect against any future refactoring/code reorganization which might + * miss setting this important bit. + */ + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + cpu_has(c, X86_FEATURE_AUTOIBRS)) + WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f9d060e71c3e..182af64387d0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -784,8 +784,7 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; +enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; #undef pr_fmt #define pr_fmt(fmt) "RETBleed: " fmt @@ -1133,13 +1132,6 @@ spectre_v2_parse_user_cmdline(void) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) -{ - return mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE; -} - static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8cd4126d8253..80710a68ef7d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -121,6 +121,7 @@ static const struct x86_cpu_id ppin_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 57a5349e6954..f97b0fe13da8 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -83,4 +83,12 @@ unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); +extern enum spectre_v2_mitigation spectre_v2_enabled; + +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +{ + return mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; +} #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 291d4167fab8..1c4639588ff9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1177,7 +1177,7 @@ static const struct { static struct ratelimit_state bld_ratelimit; static unsigned int sysctl_sld_mitigate = 1; -static DEFINE_SEMAPHORE(buslock_sem); +static DEFINE_SEMAPHORE(buslock_sem, 1); #ifdef CONFIG_PROC_SYSCTL static struct ctl_table sld_sysctls[] = { @@ -1451,31 +1451,13 @@ void handle_bus_lock(struct pt_regs *regs) } /* - * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should - * only be trusted if it is confirmed that a CPU model implements a - * specific feature at a particular bit position. - * - * The possible driver data field values: - * - * - 0: CPU models that are known to have the per-core split-lock detection - * feature even though they do not enumerate IA32_CORE_CAPABILITIES. - * - * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use - * bit 5 to enumerate the per-core split-lock detection feature. + * CPU models that are known to have the per-core split-lock detection + * feature even though they do not enumerate IA32_CORE_CAPABILITIES. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 1), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), {} }; @@ -1487,24 +1469,27 @@ static void __init split_lock_setup(struct cpuinfo_x86 *c) if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return; + /* Check for CPUs that have support but do not enumerate it: */ m = x86_match_cpu(split_lock_cpu_ids); - if (!m) - return; + if (m) + goto supported; - switch (m->driver_data) { - case 0: - break; - case 1: - if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) - return; - rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); - if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)) - return; - break; - default: + if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) return; - } + /* + * Not all bits in MSR_IA32_CORE_CAPS are architectural, but + * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set + * it have split lock detection. + */ + rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); + if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) + goto supported; + + /* CPU is not in the model list and does not have the MSR bit: */ + return; + +supported: cpu_model_supports_sld = true; __split_lock_setup(); } diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 23c5072fbbb7..0b971f974096 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -235,10 +235,10 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); * A list of the banks enabled on each logical CPU. Controls which respective * descriptors to initialize later in mce_threshold_create_device(). */ -static DEFINE_PER_CPU(unsigned int, bank_map); +static DEFINE_PER_CPU(u64, bank_map); /* Map of banks that have more than MCA_MISC0 available. */ -static DEFINE_PER_CPU(u32, smca_misc_banks_map); +static DEFINE_PER_CPU(u64, smca_misc_banks_map); static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -267,7 +267,7 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) return; if (low & MASK_BLKPTR_LO) - per_cpu(smca_misc_banks_map, cpu) |= BIT(bank); + per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank); } @@ -530,7 +530,7 @@ static u32 smca_get_block_address(unsigned int bank, unsigned int block, if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); - if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank))) + if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank))) return 0; return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); @@ -574,7 +574,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, int new; if (!block) - per_cpu(bank_map, cpu) |= (1 << bank); + per_cpu(bank_map, cpu) |= BIT_ULL(bank); memset(&b, 0, sizeof(b)); b.cpu = cpu; @@ -878,7 +878,7 @@ static void amd_threshold_interrupt(void) return; for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) + if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) continue; first_block = bp[bank]->blocks; @@ -1029,7 +1029,7 @@ static const struct sysfs_ops threshold_ops = { static void threshold_block_release(struct kobject *kobj); -static struct kobj_type threshold_ktype = { +static const struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_groups = default_groups, .release = threshold_block_release, @@ -1356,7 +1356,7 @@ int mce_threshold_create_device(unsigned int cpu) return -ENOMEM; for (bank = 0; bank < numbanks; ++bank) { - if (!(this_cpu_read(bank_map) & (1 << bank))) + if (!(this_cpu_read(bank_map) & BIT_ULL(bank))) continue; err = threshold_create_bank(bp, cpu, bank); if (err) { diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 91a415553c27..d2412ce2d312 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -244,11 +244,11 @@ noinstr void pentium_machine_check(struct pt_regs *regs); noinstr void winchip_machine_check(struct pt_regs *regs); static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } #else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void enable_p5_mce(void) {} -static inline void pentium_machine_check(struct pt_regs *regs) {} -static inline void winchip_machine_check(struct pt_regs *regs) {} +static __always_inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static __always_inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static __always_inline void enable_p5_mce(void) {} +static __always_inline void pentium_machine_check(struct pt_regs *regs) {} +static __always_inline void winchip_machine_check(struct pt_regs *regs) {} #endif noinstr u64 mce_rdmsrl(u32 msr); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9eb457b10341..f5fdeb1e3606 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -61,7 +61,7 @@ static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/microcode.rst + * format. See Documentation/arch/x86/microcode.rst */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 7a329e561354..3afcf3de0dd4 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -632,6 +632,7 @@ static const struct attribute_group cpu_root_microcode_group = { static int __init microcode_init(void) { + struct device *dev_root; struct cpuinfo_x86 *c = &boot_cpu_data; int error; @@ -652,10 +653,14 @@ static int __init microcode_init(void) if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); - error = sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpu_root_microcode_group); - if (error) { - pr_err("Error creating microcode group!\n"); - goto out_pdev; + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group); + put_device(dev_root); + if (error) { + pr_err("Error creating microcode group!\n"); + goto out_pdev; + } } /* Do per-CPU setup */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f36dc2f796c5..c7969e806c64 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -18,7 +18,6 @@ #include <linux/kexec.h> #include <linux/i8253.h> #include <linux/random.h> -#include <linux/swiotlb.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -33,7 +32,6 @@ #include <asm/nmi.h> #include <clocksource/hyperv_timer.h> #include <asm/numa.h> -#include <asm/coco.h> /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -250,11 +248,6 @@ static uint32_t __init ms_hyperv_platform(void) return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; } -static unsigned char hv_get_nmi_reason(void) -{ - return 0; -} - #ifdef CONFIG_X86_LOCAL_APIC /* * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes @@ -358,12 +351,16 @@ static void __init ms_hyperv_init_platform(void) * To mirror what Windows does we should extract CPU management * features and use the ReservedIdentityBit to detect if Linux is the * root partition. But that requires negotiating CPU management - * interface (a process to be finalized). + * interface (a process to be finalized). For now, use the privilege + * flag as the indicator for running as root. * - * For now, use the privilege flag as the indicator for running as - * root. + * Hyper-V should never specify running as root and as a Confidential + * VM. But to protect against a compromised/malicious Hyper-V trying + * to exploit root behavior to expose Confidential VM memory, ignore + * the root partition setting if also a Confidential VM. */ - if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) { + if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && + !(ms_hyperv.priv_high & HV_ISOLATION)) { hv_root_partition = true; pr_info("Hyper-V: running as root partition\n"); } @@ -397,23 +394,16 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.priv_high & HV_ISOLATION) { ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); - ms_hyperv.shared_gpa_boundary = - BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + + if (ms_hyperv.shared_gpa_boundary_active) + ms_hyperv.shared_gpa_boundary = + BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); - if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { + if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) static_branch_enable(&isolation_type_snp); -#ifdef CONFIG_SWIOTLB - swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary; -#endif - } - /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { - if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) - cc_set_vendor(CC_VENDOR_HYPERV); - } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { @@ -482,6 +472,9 @@ static void __init ms_hyperv_init_platform(void) i8253_clear_counter_on_shutdown = false; #if IS_ENABLED(CONFIG_HYPERV) + if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || + (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)) + hv_vtom_init(); /* * Setup the hook to get control post apic initialization. */ @@ -521,6 +514,7 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); + hv_vtl_init_platform(); #endif /* * TSC should be marked as unstable only after Hyper-V diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 7fe51488e136..ded1fc7cb7cb 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -76,7 +76,7 @@ unsigned int resctrl_rmid_realloc_limit; #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) /* - * The correction factor table is documented in Documentation/x86/resctrl.rst. + * The correction factor table is documented in Documentation/arch/x86/resctrl.rst. * If rmid > rmid threshold, MBM total and local values should be multiplied * by the correction factor. * @@ -383,41 +383,36 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } +static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 rmid, + enum resctrl_event_id evtid) +{ + switch (evtid) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return &d->mbm_total[rmid]; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return &d->mbm_local[rmid]; + default: + return NULL; + } +} + static int __mon_event_count(u32 rmid, struct rmid_read *rr) { struct mbm_state *m; u64 tval = 0; - if (rr->first) + if (rr->first) { resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid); + m = get_mbm_state(rr->d, rmid, rr->evtid); + if (m) + memset(m, 0, sizeof(struct mbm_state)); + return 0; + } rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval); if (rr->err) return rr->err; - switch (rr->evtid) { - case QOS_L3_OCCUP_EVENT_ID: - rr->val += tval; - return 0; - case QOS_L3_MBM_TOTAL_EVENT_ID: - m = &rr->d->mbm_total[rmid]; - break; - case QOS_L3_MBM_LOCAL_EVENT_ID: - m = &rr->d->mbm_local[rmid]; - break; - default: - /* - * Code would never reach here because an invalid - * event id would fail in resctrl_arch_rmid_read(). - */ - return -EINVAL; - } - - if (rr->first) { - memset(m, 0, sizeof(struct mbm_state)); - return 0; - } - rr->val += tval; return 0; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 524f8ff3e69c..458cb7419502 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1580,7 +1580,7 @@ int rdt_pseudo_lock_init(void) pseudo_lock_major = ret; - pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock"); + pseudo_lock_class = class_create("pseudo_lock"); if (IS_ERR(pseudo_lock_class)) { ret = PTR_ERR(pseudo_lock_class); unregister_chrdev(pseudo_lock_major, "pseudo_lock"); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index e5a37b6e9aa5..166692f2d501 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -892,20 +892,19 @@ static struct miscdevice sgx_dev_provision = { int sgx_set_attribute(unsigned long *allowed_attributes, unsigned int attribute_fd) { - struct file *file; + struct fd f = fdget(attribute_fd); - file = fget(attribute_fd); - if (!file) + if (!f.file) return -EINVAL; - if (file->f_op != &sgx_provision_fops) { - fput(file); + if (f.file->f_op != &sgx_provision_fops) { + fdput(f); return -EINVAL; } *allowed_attributes |= SGX_ATTR_PROVISIONKEY; - fput(file); + fdput(f); return 0; } EXPORT_SYMBOL_GPL(sgx_set_attribute); diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 0f2020653fba..d2dad21259a8 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -15,7 +15,7 @@ #define EREMOVE_ERROR_MESSAGE \ "EREMOVE returned %d (0x%x) and an EPC page was leaked. SGX may become unusable. " \ - "Refer to Documentation/x86/sgx.rst for more information." + "Refer to Documentation/arch/x86/sgx.rst for more information." #define SGX_MAX_EPC_SECTIONS 8 #define SGX_EEXTEND_BLOCK_SIZE 256 diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index ec8064c0ae03..2293efd6ffa6 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -232,7 +232,11 @@ static int __init umwait_init(void) * Add umwait control interface. Ignore failure, so at least the * default values are set up in case the machine manages to boot. */ - dev = cpu_subsys.dev_root; - return sysfs_create_group(&dev->kobj, &umwait_attr_group); + dev = bus_get_dev_root(&cpu_subsys); + if (dev) { + ret = sysfs_create_group(&dev->kobj, &umwait_attr_group); + put_device(dev); + } + return ret; } device_initcall(umwait_init); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 621ba9c0f17a..bdc0d5539b57 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -154,7 +154,7 @@ static int __init cpuid_init(void) CPUID_MAJOR); return -EBUSY; } - cpuid_class = class_create(THIS_MODULE, "cpuid"); + cpuid_class = class_create("cpuid"); if (IS_ERR(cpuid_class)) { err = PTR_ERR(cpuid_class); goto out_chrdev; diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index a0ed0e4a2c0c..0d9a14528176 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -163,6 +163,11 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) jmp .Lftrace_ret SYM_CODE_END(ftrace_regs_caller) +SYM_FUNC_START(ftrace_stub_direct_tramp) + CALL_DEPTH_ACCOUNT + RET +SYM_FUNC_END(ftrace_stub_direct_tramp) + #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(ftrace_graph_caller) pushl %eax diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index fb4f1e01b64a..b8c720b5dab2 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -309,6 +309,10 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) +SYM_FUNC_START(ftrace_stub_direct_tramp) + CALL_DEPTH_ACCOUNT + RET +SYM_FUNC_END(ftrace_stub_direct_tramp) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -342,7 +346,7 @@ STACK_FRAME_NON_STANDARD_FP(__fentry__) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(return_to_handler) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR subq $16, %rsp diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index ec6fefbfd3c0..10c27b4261eb 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void) x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; } -asmlinkage __visible void __init i386_start_kernel(void) +asmlinkage __visible void __init __noreturn i386_start_kernel(void) { /* Make sure IDT is set up before any exception happens */ idt_setup_early_handler(); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 387e4b12e823..49f7629b17f7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -471,7 +471,7 @@ static void __init copy_bootdata(char *real_mode_data) sme_unmap_bootdata(real_mode_data); } -asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) +asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode_data) { /* * Build-time sanity checks on the kernel image and module @@ -537,7 +537,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) x86_64_start_reservations(real_mode_data); } -void __init x86_64_start_reservations(char *real_mode_data) +void __init __noreturn x86_64_start_reservations(char *real_mode_data) { /* version is always not zero if it is copied */ if (!boot_params.hdr.version) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 222efd4a09bc..a5df3e994f04 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -42,7 +42,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) __HEAD .code64 SYM_CODE_START_NOALIGN(startup_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded an identity mapped page table @@ -61,23 +61,15 @@ SYM_CODE_START_NOALIGN(startup_64) * tables and then reload them. */ - /* Set up the stack for verify_cpu(), similar to initial_stack below */ - leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp + /* Set up the stack for verify_cpu() */ + leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp leaq _text(%rip), %rdi - /* - * initial_gs points to initial fixed_percpu_data struct with storage for - * the stack protector canary. Global pointer fixups are needed at this - * stage, so apply them as is done in fixup_pointer(), and initialize %gs - * such that the canary can be accessed at %gs:40 for subsequent C calls. - */ + /* Setup GSBASE to allow stack canary access for C code */ movl $MSR_GS_BASE, %ecx - movq initial_gs(%rip), %rax - movq $_text, %rdx - subq %rdx, %rax - addq %rdi, %rax - movq %rax, %rdx + leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx + movl %edx, %eax shrq $32, %rdx wrmsr @@ -105,7 +97,7 @@ SYM_CODE_START_NOALIGN(startup_64) lretq .Lon_kernel_cs: - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK /* Sanitize CPU configuration */ call verify_cpu @@ -127,7 +119,7 @@ SYM_CODE_START_NOALIGN(startup_64) SYM_CODE_END(startup_64) SYM_CODE_START(secondary_startup_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, @@ -156,7 +148,7 @@ SYM_CODE_START(secondary_startup_64) * verify_cpu() above to make sure NX is enabled. */ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* @@ -238,16 +230,39 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // above +#ifdef CONFIG_SMP + movl smpboot_control(%rip), %ecx + + /* Get the per cpu offset for the given CPU# which is in ECX */ + movq __per_cpu_offset(,%rcx,8), %rdx +#else + xorl %edx, %edx /* zero-extended to clear all of RDX */ +#endif /* CONFIG_SMP */ + + /* + * Setup a boot time stack - Any secondary CPU will have lost its stack + * by now because the cr3-switch above unmaps the real-mode stack. + * + * RDX contains the per-cpu offset + */ + movq pcpu_hot + X86_current_task(%rdx), %rax + movq TASK_threadsp(%rax), %rsp + /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace * addresses where we're currently running on. We have to do that here * because in 32bit we couldn't load a 64bit linear address. */ - lgdt early_gdt_descr(%rip) + subq $16, %rsp + movw $(GDT_SIZE-1), (%rsp) + leaq gdt_page(%rdx), %rax + movq %rax, 2(%rsp) + lgdt (%rsp) + addq $16, %rsp /* set up data segments */ xorl %eax,%eax @@ -271,16 +286,13 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movl initial_gs(%rip),%eax - movl initial_gs+4(%rip),%edx +#ifndef CONFIG_SMP + leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx +#endif + movl %edx, %eax + shrq $32, %rdx wrmsr - /* - * Setup a boot time stack - Any secondary CPU will have lost its stack - * by now because the cr3-switch above unmaps the real-mode stack - */ - movq initial_stack(%rip), %rsp - /* Setup and Load IDT */ pushq %rsi call early_setup_idt @@ -371,8 +383,12 @@ SYM_CODE_END(secondary_startup_64) */ SYM_CODE_START(start_cpu0) ANNOTATE_NOENDBR - UNWIND_HINT_EMPTY - movq initial_stack(%rip), %rsp + UNWIND_HINT_END_OF_STACK + + /* Find the idle task stack */ + movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx + movq TASK_threadsp(%rcx), %rsp + jmp .Ljump_to_C_code SYM_CODE_END(start_cpu0) #endif @@ -390,8 +406,6 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR - ANNOTATE_UNRET_END - /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -416,16 +430,9 @@ SYM_CODE_END(vc_boot_ghcb) __REFDATA .balign 8 SYM_DATA(initial_code, .quad x86_64_start_kernel) -SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data)) #ifdef CONFIG_AMD_MEM_ENCRYPT SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) #endif - -/* - * The FRAME_SIZE gap is a convention which helps the in-kernel unwinder - * reliably detect the end of the stack. - */ -SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE) __FINITDATA __INIT @@ -451,7 +458,6 @@ SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) UNWIND_HINT_IRET_REGS offset=16 - ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -501,8 +507,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR - ANNOTATE_UNRET_END - /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -657,8 +661,7 @@ SYM_DATA_END(level1_fixmap_pgt) .data .align 16 -SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1) -SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page)) +SYM_DATA(smpboot_control, .long 0) .align 16 /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 9ff480e94511..670eb08b972a 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -77,15 +77,6 @@ static struct ctl_table itmt_kern_table[] = { {} }; -static struct ctl_table itmt_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = itmt_kern_table, - }, - {} -}; - static struct ctl_table_header *itmt_sysctl_header; /** @@ -114,7 +105,7 @@ int sched_set_itmt_support(void) return 0; } - itmt_sysctl_header = register_sysctl_table(itmt_root_table); + itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table); if (!itmt_sysctl_header) { mutex_unlock(&itmt_update_mutex); return -ENOMEM; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 6b58610a1552..a61c12c01270 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -476,7 +476,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, efi_map_offset = params_cmdline_sz; efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16); - /* Copy setup header onto bootparams. Documentation/x86/boot.rst */ + /* Copy setup header onto bootparams. Documentation/arch/x86/boot.rst */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; /* Is there a limit on setup header size? */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 0611fd83858e..1a3e2c05a8a5 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -374,17 +374,6 @@ void machine_kexec(struct kimage *image) /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE -void *arch_kexec_kernel_image_load(struct kimage *image) -{ - if (!image->fops || !image->fops->load) - return ERR_PTR(-ENOEXEC); - - return image->fops->load(image, image->kernel_buf, - image->kernel_buf_len, image->initrd_buf, - image->initrd_buf_len, image->cmdline_buf, - image->cmdline_buf_len); -} - /* * Apply purgatory relocations. * diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 84ad0e61ba6e..b05f62ee2344 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -362,8 +362,8 @@ int module_finalize(const Elf_Ehdr *hdr, } if (locks) { void *lseg = (void *)locks->sh_addr; - void *text = me->core_layout.base; - void *text_end = text + me->core_layout.text_size; + void *text = me->mem[MOD_TEXT].base; + void *text_end = text + me->mem[MOD_TEXT].size; alternatives_smp_module_add(me, me->name, lseg, lseg + locks->sh_size, text, text_end); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 708751311786..7bb17d37db01 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -263,7 +263,7 @@ static int __init msr_init(void) pr_err("unable to get major %d for msr\n", MSR_MAJOR); return -EBUSY; } - msr_class = class_create(THIS_MODULE, "msr"); + msr_class = class_create("msr"); if (IS_ERR(msr_class)) { err = PTR_ERR(msr_class); goto out_chrdev; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 42e182868873..ac10b46c5832 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -64,11 +64,11 @@ static unsigned paravirt_patch_call(void *insn_buff, const void *target, } #ifdef CONFIG_PARAVIRT_XXL -/* identity function, which can be inlined */ -u64 notrace _paravirt_ident_64(u64 x) -{ - return x; -} +DEFINE_PARAVIRT_ASM(_paravirt_ident_64, "mov %rdi, %rax", .text); +DEFINE_PARAVIRT_ASM(pv_native_save_fl, "pushf; pop %rax", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); @@ -197,11 +197,6 @@ void paravirt_end_context_switch(struct task_struct *next) arch_enter_lazy_mmu_mode(); } -static noinstr unsigned long pv_native_read_cr2(void) -{ - return native_read_cr2(); -} - static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); @@ -222,16 +217,6 @@ noinstr void pv_native_wbinvd(void) native_wbinvd(); } -static noinstr void pv_native_irq_enable(void) -{ - native_irq_enable(); -} - -static noinstr void pv_native_irq_disable(void) -{ - native_irq_disable(); -} - static noinstr void pv_native_safe_halt(void) { native_safe_halt(); @@ -298,7 +283,7 @@ struct paravirt_patch_template pv_ops = { .cpu.end_context_switch = paravirt_nop, /* Irq ops. */ - .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), .irq.safe_halt = pv_native_safe_halt, @@ -363,8 +348,7 @@ struct paravirt_patch_template pv_ops = { .mmu.make_pte = PTE_IDENT, .mmu.make_pgd = PTE_IDENT, - .mmu.dup_mmap = paravirt_nop, - .mmu.activate_mm = paravirt_nop, + .mmu.enter_mmap = paravirt_nop, .mmu.lazy_mode = { .enter = paravirt_nop, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 30bbe4abb5d6..de6be0a3965e 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -124,7 +124,7 @@ void __init pci_iommu_alloc(void) } /* - * See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel + * See <Documentation/arch/x86/x86_64/boot-options.rst> for the iommu kernel * parameter documentation. */ static __init int iommu_setup(char *p) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b650cde3f64d..dac41a0072ea 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/cpu.h> #include <linux/prctl.h> #include <linux/slab.h> #include <linux/sched.h> @@ -48,6 +49,7 @@ #include <asm/frame.h> #include <asm/unwind.h> #include <asm/tdx.h> +#include <asm/mmu_context.h> #include "process.h" @@ -162,6 +164,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + + if (p->mm && (clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) + set_bit(MM_CONTEXT_LOCK_LAM, &p->mm->context.flags); #else p->thread.sp0 = (unsigned long) (childregs + 1); savesegment(gs, p->thread.gs); @@ -368,6 +373,8 @@ void arch_setup_new_exec(void) task_clear_spec_ssb_noexec(current); speculation_ctrl_update(read_thread_flags()); } + + mm_reset_untag_mask(current->mm); } #ifdef CONFIG_X86_IOPL_IOPERM @@ -715,7 +722,7 @@ static bool x86_idle_set(void) } #ifndef CONFIG_SMP -static inline void play_dead(void) +static inline void __noreturn play_dead(void) { BUG(); } @@ -727,7 +734,7 @@ void arch_cpu_idle_enter(void) local_touch_nmi(); } -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void) { play_dead(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bb65a68b4b49..3d181c16a2f6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -39,6 +39,7 @@ #include <linux/io.h> #include <linux/ftrace.h> #include <linux/syscalls.h> +#include <linux/iommu.h> #include <asm/processor.h> #include <asm/pkru.h> @@ -671,7 +672,7 @@ void set_personality_64bit(void) task_pt_regs(current)->orig_ax = __NR_execve; current_thread_info()->status &= ~TS_COMPAT; if (current->mm) - current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL; + __set_bit(MM_CONTEXT_HAS_VSYSCALL, ¤t->mm->context.flags); /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, @@ -708,7 +709,7 @@ static void __set_personality_ia32(void) * uprobes applied to this MM need to know this and * cannot use user_64bit_mode() at that time. */ - current->mm->context.flags = MM_CONTEXT_UPROBE_IA32; + __set_bit(MM_CONTEXT_UPROBE_IA32, ¤t->mm->context.flags); } current->personality |= force_personality32; @@ -743,6 +744,52 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) } #endif +#ifdef CONFIG_ADDRESS_MASKING + +#define LAM_U57_BITS 6 + +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) +{ + if (!cpu_feature_enabled(X86_FEATURE_LAM)) + return -ENODEV; + + /* PTRACE_ARCH_PRCTL */ + if (current->mm != mm) + return -EINVAL; + + if (mm_valid_pasid(mm) && + !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags)) + return -EINVAL; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) { + mmap_write_unlock(mm); + return -EBUSY; + } + + if (!nr_bits) { + mmap_write_unlock(mm); + return -EINVAL; + } else if (nr_bits <= LAM_U57_BITS) { + mm->context.lam_cr3_mask = X86_CR3_LAM_U57; + mm->context.untag_mask = ~GENMASK(62, 57); + } else { + mmap_write_unlock(mm); + return -EINVAL; + } + + write_cr3(__read_cr3() | mm->context.lam_cr3_mask); + set_tlbstate_lam_mode(mm); + set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); + + mmap_write_unlock(mm); + + return 0; +} +#endif + long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) { int ret = 0; @@ -830,7 +877,23 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) case ARCH_MAP_VDSO_64: return prctl_map_vdso(&vdso_image_64, arg2); #endif - +#ifdef CONFIG_ADDRESS_MASKING + case ARCH_GET_UNTAG_MASK: + return put_user(task->mm->context.untag_mask, + (unsigned long __user *)arg2); + case ARCH_ENABLE_TAGGED_ADDR: + return prctl_enable_tagged_addr(task->mm, arg2); + case ARCH_FORCE_TAGGED_SVA: + if (current != task) + return -EINVAL; + set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags); + return 0; + case ARCH_GET_MAX_TAG_BITS: + if (!cpu_feature_enabled(X86_FEATURE_LAM)) + return put_user(0, (unsigned long __user *)arg2); + else + return put_user(LAM_U57_BITS, (unsigned long __user *)arg2); +#endif default: ret = -EINVAL; break; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d03c551defcc..3adbe97015c1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -920,7 +920,7 @@ void run_crash_ipi_callback(struct pt_regs *regs) } /* Override the weak function in kernel/panic.c */ -void nmi_panic_self_stop(struct pt_regs *regs) +void __noreturn nmi_panic_self_stop(struct pt_regs *regs) { while (1) { /* If no CPU is preparing crash dump, we simply loop here. */ diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 4a73351f87f8..56cab1bb25f5 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -43,7 +43,7 @@ .code64 SYM_CODE_START_NOALIGN(relocate_range) SYM_CODE_START_NOALIGN(relocate_kernel) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* * %rdi indirection_page @@ -113,7 +113,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK /* set return address to 0 if not preserving context */ pushq $0 /* store the start address on the stack */ @@ -231,7 +231,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // RET target, above movq RSP(%r8), %rsp movq CR4(%r8), %rax @@ -256,8 +256,8 @@ SYM_CODE_END(virtual_mapped) /* Do the copies */ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) - UNWIND_HINT_EMPTY - movq %rdi, %rcx /* Put the page_list in %rcx */ + UNWIND_HINT_END_OF_STACK + movq %rdi, %rcx /* Put the page_list in %rcx */ xorl %edi, %edi xorl %esi, %esi jmp 1f diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 3f664ab277c4..b031244d6d2d 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -22,6 +22,8 @@ #include <linux/efi.h> #include <linux/platform_device.h> #include <linux/io.h> +#include <linux/psp-sev.h> +#include <uapi/linux/sev-guest.h> #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> @@ -2175,7 +2177,7 @@ static int __init init_sev_config(char *str) } __setup("sev=", init_sev_config); -int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err) +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { struct ghcb_state state; struct es_em_ctxt ctxt; @@ -2183,8 +2185,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned struct ghcb *ghcb; int ret; - if (!fw_err) - return -EINVAL; + rio->exitinfo2 = SEV_RET_NO_FW_CALL; /* * __sev_get_ghcb() needs to run with IRQs disabled because it is using @@ -2209,16 +2210,16 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned if (ret) goto e_put; - *fw_err = ghcb->save.sw_exit_info_2; - switch (*fw_err) { + rio->exitinfo2 = ghcb->save.sw_exit_info_2; + switch (rio->exitinfo2) { case 0: break; - case SNP_GUEST_REQ_ERR_BUSY: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): ret = -EAGAIN; break; - case SNP_GUEST_REQ_INVALID_LEN: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): /* Number of expected pages are returned in RBX */ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { input->data_npages = ghcb_get_rbx(ghcb); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9013bb28255a..352f0ce1ece4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -121,17 +121,20 @@ int arch_update_cpu_topology(void) return retval; } + +static unsigned int smpboot_warm_reset_vector_count; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0xa, 0xf); + if (!smpboot_warm_reset_vector_count++) { + CMOS_WRITE(0xa, 0xf); + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; + } spin_unlock_irqrestore(&rtc_lock, flags); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = - start_eip >> 4; - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = - start_eip & 0xf; } static inline void smpboot_restore_warm_reset_vector(void) @@ -143,10 +146,12 @@ static inline void smpboot_restore_warm_reset_vector(void) * to default values. */ spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0, 0xf); + if (!--smpboot_warm_reset_vector_count) { + CMOS_WRITE(0, 0xf); + *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; + } spin_unlock_irqrestore(&rtc_lock, flags); - *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } /* @@ -1059,8 +1064,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); -#else - initial_gs = per_cpu_offset(cpu); #endif return 0; } @@ -1086,9 +1089,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, start_ip = real_mode_header->trampoline_start64; #endif idle->thread.sp = (unsigned long)task_pt_regs(idle); - early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); initial_code = (unsigned long)start_secondary; - initial_stack = idle->thread.sp; + + if (IS_ENABLED(CONFIG_X86_32)) { + early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); + initial_stack = idle->thread.sp; + } else { + smpboot_control = cpu; + } /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); @@ -1816,7 +1824,7 @@ static inline void mwait_play_dead(void) } } -void hlt_play_dead(void) +void __noreturn hlt_play_dead(void) { if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d317dc3d06a3..58b1f208eff5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -40,7 +40,7 @@ #include <linux/io.h> #include <linux/hardirq.h> #include <linux/atomic.h> -#include <linux/ioasid.h> +#include <linux/iommu.h> #include <asm/stacktrace.h> #include <asm/processor.h> @@ -671,15 +671,15 @@ static bool try_fixup_enqcmd_gp(void) if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) return false; - pasid = current->mm->pasid; - /* * If the mm has not been allocated a * PASID, the #GP can not be fixed up. */ - if (!pasid_valid(pasid)) + if (!mm_valid_pasid(current->mm)) return false; + pasid = current->mm->pasid; + /* * Did this thread already have its PASID activated? * If so, the #GP must be from something else. diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 37307b40f8da..3ac50b7298d1 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -133,7 +133,7 @@ static struct orc_entry null_orc_entry = { .sp_offset = sizeof(long), .sp_reg = ORC_REG_SP, .bp_reg = ORC_REG_UNDEFINED, - .type = UNWIND_HINT_TYPE_CALL + .type = ORC_TYPE_CALL }; #ifdef CONFIG_CALL_THUNKS @@ -153,12 +153,11 @@ static struct orc_entry *orc_callthunk_find(unsigned long ip) /* Fake frame pointer entry -- used as a fallback for generated code */ static struct orc_entry orc_fp_entry = { - .type = UNWIND_HINT_TYPE_CALL, + .type = ORC_TYPE_CALL, .sp_reg = ORC_REG_BP, .sp_offset = 16, .bp_reg = ORC_REG_PREV_SP, .bp_offset = -16, - .end = 0, }; static struct orc_entry *orc_find(unsigned long ip) @@ -250,13 +249,13 @@ static int orc_sort_cmp(const void *_a, const void *_b) return -1; /* - * The "weak" section terminator entries need to always be on the left + * The "weak" section terminator entries need to always be first * to ensure the lookup code skips them in favor of real entries. * These terminator entries exist to handle any gaps created by * whitelisted .o files which didn't get objtool generation. */ orc_a = cur_orc_table + (a - cur_orc_ip_table); - return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; + return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1; } void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, @@ -474,14 +473,12 @@ bool unwind_next_frame(struct unwind_state *state) */ orc = &orc_fp_entry; state->error = true; - } - - /* End-of-stack check for kernel threads: */ - if (orc->sp_reg == ORC_REG_UNDEFINED) { - if (!orc->end) + } else { + if (orc->type == ORC_TYPE_UNDEFINED) goto err; - goto the_end; + if (orc->type == ORC_TYPE_END_OF_STACK) + goto the_end; } state->signal = orc->signal; @@ -554,7 +551,7 @@ bool unwind_next_frame(struct unwind_state *state) /* Find IP, SP and possibly regs: */ switch (orc->type) { - case UNWIND_HINT_TYPE_CALL: + case ORC_TYPE_CALL: ip_p = sp - sizeof(long); if (!deref_stack_reg(state, ip_p, &state->ip)) @@ -567,7 +564,7 @@ bool unwind_next_frame(struct unwind_state *state) state->prev_regs = NULL; break; - case UNWIND_HINT_TYPE_REGS: + case ORC_TYPE_REGS: if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { orc_warn_current("can't access registers at %pB\n", (void *)orig_ip); @@ -590,13 +587,13 @@ bool unwind_next_frame(struct unwind_state *state) state->full_regs = true; break; - case UNWIND_HINT_TYPE_REGS_PARTIAL: + case ORC_TYPE_REGS_PARTIAL: if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { orc_warn_current("can't access iret registers at %pB\n", (void *)orig_ip); goto err; } - /* See UNWIND_HINT_TYPE_REGS case comment. */ + /* See ORC_TYPE_REGS case comment. */ state->ip = unwind_recover_rethook(state, state->ip, (unsigned long *)(state->sp - sizeof(long))); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ef80d361b463..d82f4fa2f1bf 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; } static void iommu_shutdown_noop(void) { } bool __init bool_x86_init_noop(void) { return false; } void x86_op_int_noop(int cpu) { } -static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } -static __init void get_rtc_noop(struct timespec64 *now) { } +int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } +void get_rtc_noop(struct timespec64 *now) { } static __initconst const struct of_device_id of_cmos_match[] = { { .compatible = "motorola,mc146818" }, @@ -134,6 +134,7 @@ static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } +static bool is_private_mmio_noop(u64 addr) {return false; } struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, @@ -149,6 +150,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .realmode_reserve = reserve_real_mode, .realmode_init = init_real_mode, .hyper.pin_vcpu = x86_op_int_noop, + .hyper.is_private_mmio = is_private_mmio_noop, .guest = { .enc_status_change_prepare = enc_status_change_prepare_noop, |