diff options
Diffstat (limited to 'arch/x86')
30 files changed, 180 insertions, 54 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b39975977c03..fdc2e3abd615 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -305,6 +305,18 @@ ifeq ($(RETPOLINE_CFLAGS),) endif endif +ifdef CONFIG_UNWINDER_ORC +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h +orc_hash_sh := $(srctree)/scripts/orc_hash.sh +targets += $(orc_hash_h) +quiet_cmd_orc_hash = GEN $@ + cmd_orc_hash = mkdir -p $(dir $@); \ + $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@ +$(orc_hash_h): $(srctree)/arch/x86/include/asm/orc_types.h $(orc_hash_sh) FORCE + $(call if_changed,orc_hash) +archprepare: $(orc_hash_h) +endif + archclean: $(Q)rm -rf $(objtree)/arch/i386 $(Q)rm -rf $(objtree)/arch/x86_64 diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index 7c1abc513f34..9556dacd9841 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -773,8 +773,6 @@ .octa 0x3F893781E95FE1576CDA64D2BA0CB204 #ifdef CONFIG_AS_GFNI -.section .rodata.cst8, "aM", @progbits, 8 -.align 8 /* AES affine: */ #define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) .Ltf_aff_bitmatrix: diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 070cc4ef2672..27f3a7b34bd5 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -349,6 +349,16 @@ static struct event_constraint intel_spr_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct extra_reg intel_gnr_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); @@ -4074,7 +4084,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) if (x86_pmu.intel_cap.pebs_baseline) { arr[(*nr)++] = (struct perf_guest_switch_msr){ .msr = MSR_PEBS_DATA_CFG, - .host = cpuc->pebs_data_cfg, + .host = cpuc->active_pebs_data_cfg, .guest = kvm_pmu->pebs_data_cfg, }; } @@ -6496,6 +6506,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + x86_pmu.extra_regs = intel_spr_extra_regs; fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: @@ -6506,7 +6517,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_spr_event_constraints; x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; - x86_pmu.extra_regs = intel_spr_extra_regs; + if (!x86_pmu.extra_regs) + x86_pmu.extra_regs = intel_gnr_extra_regs; x86_pmu.limit_period = spr_limit_period; x86_pmu.pebs_ept = 1; x86_pmu.pebs_aliases = NULL; @@ -6650,6 +6662,7 @@ __init int intel_pmu_init(void) pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { + x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index fa9b209a11fa..d49e90dc04a4 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6150,6 +6150,7 @@ static struct intel_uncore_type spr_uncore_mdf = { }; #define UNCORE_SPR_NUM_UNCORE_TYPES 12 +#define UNCORE_SPR_CHA 0 #define UNCORE_SPR_IIO 1 #define UNCORE_SPR_IMC 6 #define UNCORE_SPR_UPI 8 @@ -6460,12 +6461,22 @@ static int uncore_type_max_boxes(struct intel_uncore_type **types, return max + 1; } +#define SPR_MSR_UNC_CBO_CONFIG 0x2FFE + void spr_uncore_cpu_init(void) { + struct intel_uncore_type *type; + u64 num_cbo; + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, UNCORE_SPR_MSR_EXTRA_UNCORES, spr_msr_uncores); + type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA); + if (type) { + rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo); + type->num_boxes = num_cbo; + } spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); } diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a5f9474f08e1..6c04b52f139b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -416,7 +416,7 @@ void __init hyperv_init(void) goto free_vp_assist_page; } - cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", hv_cpu_init, hv_cpu_die); if (cpuhp < 0) goto free_ghcb_page; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 1ba5d3b99b16..85d38b9f3586 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + x86_platform.realmode_reserve = x86_init_noop; + x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 1e51650b79d7..4f1ce5fc4e19 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +generated-y += orc_hash.h generated-y += syscalls_32.h generated-y += syscalls_64.h generated-y += syscalls_x32.h diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index c2d6cd78ed0c..78fcde7b1f07 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -39,7 +39,7 @@ extern void fpu_flush_thread(void); static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (cpu_feature_enabled(X86_FEATURE_FPU) && - !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) { + !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { save_fpregs_to_fpstate(old_fpu); /* * The save operation preserved register state, so the diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h new file mode 100644 index 000000000000..07bacf3e160e --- /dev/null +++ b/arch/x86/include/asm/orc_header.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#ifndef _ORC_HEADER_H +#define _ORC_HEADER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/orc_hash.h> + +/* + * The header is currently a 20-byte hash of the ORC entry definition; see + * scripts/orc_hash.sh. + */ +#define ORC_HEADER \ + __used __section(".orc_header") __aligned(4) \ + static const u8 orc_header[] = { ORC_HASH } + +#endif /* _ORC_HEADER_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 498dc600bd5c..0d02c4aafa6f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -13,7 +13,9 @@ #include <linux/bitops.h> +#include <linux/bug.h> #include <linux/types.h> + #include <uapi/asm/vmx.h> #include <asm/vmxfeatures.h> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index dd61752f4c96..4070a01c11b7 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -17,6 +17,7 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_sev.o = -pg +CFLAGS_REMOVE_rethook.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6bde05a86b4e..896bc41cb2ba 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -97,7 +97,10 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) + if (!x2apic_mode) + return 0; + + if (x2apic_phys || x2apic_fadt_phys()) return 1; return apic == &apic_x2apic_phys; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 5e868b62a7c4..0270925fe013 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -79,7 +79,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c) * initial apic id, which also represents 32-bit extended x2apic id. */ c->initial_apicid = edx; - smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); #endif return 0; } @@ -109,7 +109,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c) */ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); c->initial_apicid = edx; - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0bf6779187dd..f18ca44c904b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -195,7 +195,6 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); - stack = stack ? : get_stack_pointer(task, regs); regs = unwind_get_entry_regs(&state, &partial); /* @@ -214,9 +213,13 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack * - entry stack */ - for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for (stack = stack ?: get_stack_pointer(task, regs); + stack; + stack = stack_info.next_sp) { const char *stack_name; + stack = PTR_ALIGN(stack, sizeof(long)); + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { /* * We weren't on a valid stack. It's possible that diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index 9fcfa5c4dad7..af5cbdd9bd29 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -57,7 +57,7 @@ static inline void fpregs_restore_userregs(void) struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); - if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_IO_WORKER))) + if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) return; if (!fpregs_state_valid(fpu, cpu)) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index caf33486dc5e..1015af1ae562 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -426,7 +426,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) this_cpu_write(in_kernel_fpu, true); - if (!(current->flags & (PF_KTHREAD | PF_IO_WORKER)) && + if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); save_fpregs_to_fpstate(¤t->thread.fpu); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a5df3e994f04..113c13376e51 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -77,6 +77,15 @@ SYM_CODE_START_NOALIGN(startup_64) call startup_64_setup_env popq %rsi + /* Now switch to __KERNEL_CS so IRET works reliably */ + pushq $__KERNEL_CS + leaq .Lon_kernel_cs(%rip), %rax + pushq %rax + lretq + +.Lon_kernel_cs: + UNWIND_HINT_END_OF_STACK + #ifdef CONFIG_AMD_MEM_ENCRYPT /* * Activate SEV/SME memory encryption if supported/enabled. This needs to @@ -90,15 +99,6 @@ SYM_CODE_START_NOALIGN(startup_64) popq %rsi #endif - /* Now switch to __KERNEL_CS so IRET works reliably */ - pushq $__KERNEL_CS - leaq .Lon_kernel_cs(%rip), %rax - pushq %rax - lretq - -.Lon_kernel_cs: - UNWIND_HINT_END_OF_STACK - /* Sanitize CPU configuration */ call verify_cpu diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 3ac50b7298d1..4d8e518365f4 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -7,6 +7,9 @@ #include <asm/unwind.h> #include <asm/orc_types.h> #include <asm/orc_lookup.h> +#include <asm/orc_header.h> + +ORC_HEADER; #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 123bf8b97a4b..0c9660a07b23 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -253,7 +253,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e int nent) { struct kvm_cpuid_entry2 *best; - u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent); best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (best) { @@ -292,21 +291,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT); } - - /* - * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate - * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's - * requested XCR0 value. The enclave's XFRM must be a subset of XCRO - * at the time of EENTER, thus adjust the allowed XFRM by the guest's - * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to - * '1' even on CPUs that don't support XSAVE. - */ - best = cpuid_entry2_find(entries, nent, 0x12, 0x1); - if (best) { - best->ecx &= guest_supported_xcr0 & 0xffffffff; - best->edx &= guest_supported_xcr0 >> 32; - best->ecx |= XFEATURE_MASK_FPSSE; - } } void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e542cf285b51..3c300a196bdf 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -229,6 +229,23 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new, u32 physical_id; /* + * For simplicity, KVM always allocates enough space for all possible + * xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on + * without the optimized map. + */ + if (WARN_ON_ONCE(xapic_id > new->max_apic_id)) + return -EINVAL; + + /* + * Bail if a vCPU was added and/or enabled its APIC between allocating + * the map and doing the actual calculations for the map. Note, KVM + * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if + * the compiler decides to reload x2apic_id after this check. + */ + if (x2apic_id > new->max_apic_id) + return -E2BIG; + + /* * Deliberately truncate the vCPU ID when detecting a mismatched APIC * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a * 32-bit value. Any unwanted aliasing due to truncation results will @@ -253,8 +270,7 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new, */ if (vcpu->kvm->arch.x2apic_format) { /* See also kvm_apic_match_physical_addr(). */ - if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && - x2apic_id <= new->max_apic_id) + if (apic_x2apic_mode(apic) || x2apic_id > 0xff) new->phys_map[x2apic_id] = apic; if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c8961f45e3b1..6eaa3d6994ae 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7091,7 +7091,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) */ slot = NULL; if (atomic_read(&kvm->nr_memslots_dirty_logging)) { - slot = gfn_to_memslot(kvm, sp->gfn); + struct kvm_memslots *slots; + + slots = kvm_memslots_for_spte_role(kvm, sp->role); + slot = __gfn_to_memslot(slots, sp->gfn); WARN_ON_ONCE(!slot); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ca32389f3c36..54089f990c8f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3510,7 +3510,7 @@ static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu) if (!is_vnmi_enabled(svm)) return false; - return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK); + return !!(svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK); } static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 0574030b071f..2261b684a7d4 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -170,12 +170,19 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, return 1; } - /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */ + /* + * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note + * that the allowed XFRM (XFeature Request Mask) isn't strictly bound + * by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE + * is unsupported, i.e. even if XCR0 itself is completely unsupported. + */ if ((u32)miscselect & ~sgx_12_0->ebx || (u32)attributes & ~sgx_12_1->eax || (u32)(attributes >> 32) & ~sgx_12_1->ebx || (u32)xfrm & ~sgx_12_1->ecx || - (u32)(xfrm >> 32) & ~sgx_12_1->edx) { + (u32)(xfrm >> 32) & ~sgx_12_1->edx || + xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) || + (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { kvm_inject_gp(vcpu, 0); return 1; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ceb7c5e9cf9e..04b57a336b34 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1446,7 +1446,7 @@ static const u32 msrs_to_save_base[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, + MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, @@ -7155,6 +7155,10 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) return; break; + case MSR_IA32_TSX_CTRL: + if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR)) + return; + break; default: break; } @@ -10754,6 +10758,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; break; } + + /* Note, VM-Exits that go down the "slow" path are accounted below. */ + ++vcpu->stat.exits; } /* diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 4fc5c2de2de4..01c5de4c279b 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -7,6 +7,8 @@ */ #include <linux/linkage.h> +#include <asm/cpufeatures.h> +#include <asm/alternative.h> #include <asm/asm.h> #include <asm/export.h> @@ -29,7 +31,7 @@ */ SYM_FUNC_START(rep_movs_alternative) cmpq $64,%rcx - jae .Lunrolled + jae .Llarge cmp $8,%ecx jae .Lword @@ -65,6 +67,12 @@ SYM_FUNC_START(rep_movs_alternative) _ASM_EXTABLE_UA( 2b, .Lcopy_user_tail) _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail) +.Llarge: +0: ALTERNATIVE "jmp .Lunrolled", "rep movsb", X86_FEATURE_ERMS +1: RET + + _ASM_EXTABLE_UA( 0b, 1b) + .p2align 4 .Lunrolled: 10: movq (%rsi),%r8 diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 3cdac0f0055d..8192452d1d2d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -9,6 +9,7 @@ #include <linux/sched/task.h> #include <asm/set_memory.h> +#include <asm/cpu_device_id.h> #include <asm/e820/api.h> #include <asm/init.h> #include <asm/page.h> @@ -261,6 +262,24 @@ static void __init probe_page_size_mask(void) } } +#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \ + .family = 6, \ + .model = _model, \ + } +/* + * INVLPG may not properly flush Global entries + * on these CPUs when PCIDs are enabled. + */ +static const struct x86_cpu_id invlpg_miss_ids[] = { + INTEL_MATCH(INTEL_FAM6_ALDERLAKE ), + INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ), + INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S), + {} +}; + static void setup_pcid(void) { if (!IS_ENABLED(CONFIG_X86_64)) @@ -269,6 +288,12 @@ static void setup_pcid(void) if (!boot_cpu_has(X86_FEATURE_PCID)) return; + if (x86_match_cpu(invlpg_miss_ids)) { + pr_info("Incomplete global flushes, disabling PCID"); + setup_clear_cpu_cap(X86_FEATURE_PCID); + return; + } + if (boot_cpu_has(X86_FEATURE_PGE)) { /* * This can't be cr4_set_bits_and_update_boot() -- the diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 557f0fe25dff..37db264866b6 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void) set_p4d(p4d_tramp, __p4d(_KERNPG_TABLE | __pa(pud_page_tramp))); - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)); } else { - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); } } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1056bbf55b17..438adb695daa 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2570,7 +2570,7 @@ out_image: } if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, proglen, pass + 1, image); + bpf_jit_dump(prog->len, proglen, pass + 1, rw_image); if (image) { if (!prog->is_func || extra_pass) { diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 8babce71915f..014c508e914d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -198,7 +198,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) i++; } kfree(v); - return 0; + return msi_device_populate_sysfs(&dev->dev); error: if (ret == -ENOSYS) @@ -254,7 +254,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) dev_dbg(&dev->dev, "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq); } - return 0; + return msi_device_populate_sysfs(&dev->dev); error: dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n", @@ -346,7 +346,7 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (ret < 0) goto out; } - ret = 0; + ret = msi_device_populate_sysfs(&dev->dev); out: return ret; } @@ -394,6 +394,8 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) xen_destroy_irq(msidesc->irq + i); msidesc->irq = 0; } + + msi_device_destroy_sysfs(&dev->dev); } static void xen_pv_teardown_msi_irqs(struct pci_dev *dev) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 82fec66d46d2..42abd6af1198 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE CFLAGS_sha256.o := -D__DISABLE_EXPORTS +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib |