diff options
Diffstat (limited to 'arch/x86/include')
24 files changed, 329 insertions, 167 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 5ab1a4598d00..a03aa6f999d1 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -158,13 +158,13 @@ static inline bool acpi_has_cpu_in_madt(void) } #define ACPI_HAVE_ARCH_SET_ROOT_POINTER -static inline void acpi_arch_set_root_pointer(u64 addr) +static __always_inline void acpi_arch_set_root_pointer(u64 addr) { x86_init.acpi.set_root_pointer(addr); } #define ACPI_HAVE_ARCH_GET_ROOT_POINTER -static inline u64 acpi_arch_get_root_pointer(void) +static __always_inline u64 acpi_arch_get_root_pointer(void) { return x86_init.acpi.get_root_pointer(); } diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 23d86c9750b9..07ba4935e873 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -488,11 +488,14 @@ static inline void apic_setup_apic_calls(void) { } extern void apic_ack_irq(struct irq_data *data); +#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32) +#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10) + static inline bool lapic_vector_set_in_irr(unsigned int vector) { - u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + u32 irr = apic_read(APIC_IRR + APIC_VECTOR_TO_REG_OFFSET(vector)); - return !!(irr & (1U << (vector % 32))); + return !!(irr & (1U << APIC_VECTOR_TO_BIT_NUMBER(vector))); } static inline bool is_vector_pending(unsigned int vector) @@ -500,6 +503,65 @@ static inline bool is_vector_pending(unsigned int vector) return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector); } +#define MAX_APIC_VECTOR 256 +#define APIC_VECTORS_PER_REG 32 + +/* + * Vector states are maintained by APIC in 32-bit registers that are + * 16 bytes aligned. The status of each vector is kept in a single + * bit. + */ +static inline int apic_find_highest_vector(void *bitmap) +{ + int vec; + u32 *reg; + + for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; vec >= 0; vec -= APIC_VECTORS_PER_REG) { + reg = bitmap + APIC_VECTOR_TO_REG_OFFSET(vec); + if (*reg) + return __fls(*reg) + vec; + } + + return -1; +} + +static inline u32 apic_get_reg(void *regs, int reg) +{ + return *((u32 *) (regs + reg)); +} + +static inline void apic_set_reg(void *regs, int reg, u32 val) +{ + *((u32 *) (regs + reg)) = val; +} + +static __always_inline u64 apic_get_reg64(void *regs, int reg) +{ + BUILD_BUG_ON(reg != APIC_ICR); + return *((u64 *) (regs + reg)); +} + +static __always_inline void apic_set_reg64(void *regs, int reg, u64 val) +{ + BUILD_BUG_ON(reg != APIC_ICR); + *((u64 *) (regs + reg)) = val; +} + +static inline void apic_clear_vector(int vec, void *bitmap) +{ + clear_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec)); +} + +static inline void apic_set_vector(int vec, void *bitmap) +{ + set_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec)); +} + +static inline int apic_test_vector(int vec, void *bitmap) +{ + return test_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec)); +} + /* * Warm reset vector position: */ diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h index 2930f560d7f3..e1f965bb1e31 100644 --- a/arch/x86/include/asm/ce4100.h +++ b/arch/x86/include/asm/ce4100.h @@ -4,4 +4,10 @@ int ce4100_pci_init(void); +#ifdef CONFIG_SERIAL_8250 +void __init sdv_serial_fixup(void); +#else +static inline void sdv_serial_fixup(void) {}; +#endif + #endif diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..602957dd2609 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,10 +456,14 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ + #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ + #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ @@ -487,6 +491,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +549,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 1c94121acd3d..93e99d2583d6 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -118,7 +118,7 @@ enum xfeature { XFEATURE_PKRU, XFEATURE_PASID, XFEATURE_CET_USER, - XFEATURE_CET_KERNEL_UNUSED, + XFEATURE_CET_KERNEL, XFEATURE_RSRVD_COMP_13, XFEATURE_RSRVD_COMP_14, XFEATURE_LBR, @@ -142,7 +142,7 @@ enum xfeature { #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) #define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) #define XFEATURE_MASK_CET_USER (1 << XFEATURE_CET_USER) -#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL_UNUSED) +#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL) #define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) #define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG) #define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA) @@ -269,6 +269,16 @@ struct cet_user_state { }; /* + * State component 12 is Control-flow Enforcement supervisor states. + * This state includes SSP pointers for privilege levels 0 through 2. + */ +struct cet_supervisor_state { + u64 pl0_ssp; + u64 pl1_ssp; + u64 pl2_ssp; +} __packed; + +/* * State component 15: Architectural LBR configuration state. * The size of Arch LBR state depends on the number of LBRs (lbr_depth). */ @@ -552,6 +562,31 @@ struct fpu_guest { }; /* + * FPU state configuration data for fpu_guest. + * Initialized at boot time. Read only after init. + */ +struct vcpu_fpu_config { + /* + * @size: + * + * The default size of the register state buffer in guest FPUs. + * Includes all supported features except independent managed + * features and features which have to be requested by user space + * before usage. + */ + unsigned int size; + + /* + * @features: + * + * The default supported features bitmap in guest FPUs. Does not + * include independent managed features and features which have to + * be requested by user space before usage. + */ + u64 features; +}; + +/* * FPU state configuration data. Initialized at boot time. Read only after init. */ struct fpu_state_config { @@ -567,8 +602,9 @@ struct fpu_state_config { * @default_size: * * The default size of the register state buffer. Includes all - * supported features except independent managed features and - * features which have to be requested by user space before usage. + * supported features except independent managed features, + * guest-only features and features which have to be requested by + * user space before usage. */ unsigned int default_size; @@ -584,8 +620,8 @@ struct fpu_state_config { * @default_features: * * The default supported features bitmap. Does not include - * independent managed features and features which have to - * be requested by user space before usage. + * independent managed features, guest-only features and features + * which have to be requested by user space before usage. */ u64 default_features; /* @@ -606,5 +642,6 @@ struct fpu_state_config { /* FPU state configuration information */ extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; +extern struct vcpu_fpu_config guest_default_cfg; #endif /* _ASM_X86_FPU_TYPES_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index b308a76afbb7..7a7dc9d56027 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -46,9 +46,13 @@ /* Features which are dynamically enabled for a process on request */ #define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA +/* Supervisor features which are enabled only in guest FPUs */ +#define XFEATURE_MASK_GUEST_SUPERVISOR XFEATURE_MASK_CET_KERNEL + /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \ - XFEATURE_MASK_CET_USER) + XFEATURE_MASK_CET_USER | \ + XFEATURE_MASK_GUEST_SUPERVISOR) /* * A supervisor state component may not always contain valuable information, @@ -75,8 +79,7 @@ * Unsupported supervisor features. When a supervisor feature in this mask is * supported in the future, move it to the supported supervisor feature mask. */ -#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \ - XFEATURE_MASK_CET_KERNEL) +#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT) /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 8b1b1abcef15..5a68e9db6518 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -5,7 +5,7 @@ #if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000 #define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector #else -#define __head __section(".head.text") __no_sanitize_undefined +#define __head __section(".head.text") __no_sanitize_undefined __no_kstack_erase #endif struct x86_mapping_info { diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h index 43b7657febca..944637a4e6de 100644 --- a/arch/x86/include/asm/intel_telemetry.h +++ b/arch/x86/include/asm/intel_telemetry.h @@ -59,18 +59,6 @@ struct telemetry_plt_config { }; struct telemetry_core_ops { - int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period, - u8 *ioss_min_period, u8 *ioss_max_period); - - int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig, - struct telemetry_evtconfig *ioss_evtconfig, - int pss_len, int ioss_len); - - int (*update_events)(struct telemetry_evtconfig pss_evtconfig, - struct telemetry_evtconfig ioss_evtconfig); - - int (*set_sampling_period)(u8 pss_period, u8 ioss_period); - int (*get_trace_verbosity)(enum telemetry_unit telem_unit, u32 *verbosity); @@ -84,11 +72,6 @@ struct telemetry_core_ops { int (*read_eventlog)(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len, int log_all_evts); - - int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts, - u32 *pss_evtmap, u32 *ioss_evtmap); - - int (*reset_events)(void); }; int telemetry_set_pltdata(const struct telemetry_core_ops *ops, @@ -101,35 +84,15 @@ struct telemetry_plt_config *telemetry_get_pltdata(void); int telemetry_get_evtname(enum telemetry_unit telem_unit, const char **name, int len); -int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig, - struct telemetry_evtconfig ioss_evtconfig); - -int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts, - u32 *pss_evtmap, u32 *ioss_evtmap); - -int telemetry_reset_events(void); - -int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config, - struct telemetry_evtconfig *ioss_config, - int pss_len, int ioss_len); - int telemetry_read_events(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len); -int telemetry_raw_read_events(enum telemetry_unit telem_unit, - struct telemetry_evtlog *evtlog, int len); - int telemetry_read_eventlog(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len); int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit, struct telemetry_evtlog *evtlog, int len); -int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period, - u8 *ioss_min_period, u8 *ioss_max_period); - -int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period); - int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit, u32 verbosity); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5036f13ab69f..5a0d42464d44 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -26,7 +26,22 @@ enum { IRQ_REMAP_X2APIC_MODE, }; -struct vcpu_data { +/* + * This is mainly used to communicate information back-and-forth + * between SVM and IOMMU for setting up and tearing down posted + * interrupt + */ +struct amd_iommu_pi_data { + u64 vapic_addr; /* Physical address of the vCPU's vAPIC. */ + u32 ga_tag; + u32 vector; /* Guest vector of the interrupt */ + int cpu; + bool ga_log_intr; + bool is_guest_mode; + void *ir_data; +}; + +struct intel_iommu_pi_data { u64 pi_desc_addr; /* Physical address of PI Descriptor */ u32 vector; /* Guest vector of the interrupt */ }; diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9a9b21b78905..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void) static __always_inline void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 8d50e3e0a19b..18a5c3119e1a 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -49,7 +49,6 @@ KVM_X86_OP(set_idt) KVM_X86_OP(get_gdt) KVM_X86_OP(set_gdt) KVM_X86_OP(sync_dirty_debug_regs) -KVM_X86_OP(set_dr6) KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) @@ -112,7 +111,7 @@ KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging) KVM_X86_OP_OPTIONAL(vcpu_blocking) KVM_X86_OP_OPTIONAL(vcpu_unblocking) KVM_X86_OP_OPTIONAL(pi_update_irte) -KVM_X86_OP_OPTIONAL(pi_start_assignment) +KVM_X86_OP_OPTIONAL(pi_start_bypass) KVM_X86_OP_OPTIONAL(apicv_pre_state_restore) KVM_X86_OP_OPTIONAL(apicv_post_state_restore) KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) @@ -139,7 +138,7 @@ KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) -KVM_X86_OP(msr_filter_changed) +KVM_X86_OP(recalc_msr_intercepts) KVM_X86_OP(complete_emulated_msr) KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 639d9bcee842..f19a76d3ca0e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -297,6 +297,7 @@ enum x86_intercept_stage; */ #define KVM_APIC_PV_EOI_PENDING 1 +struct kvm_kernel_irqfd; struct kvm_kernel_irq_routing_entry; /* @@ -700,8 +701,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -764,6 +770,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -1314,6 +1321,12 @@ enum kvm_apicv_inhibit { */ APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, + /* + * AVIC is disabled because the vCPU's APIC ID is beyond the max + * supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable. + */ + APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG, + NR_APICV_INHIBIT_REASONS, }; @@ -1332,7 +1345,8 @@ enum kvm_apicv_inhibit { __APICV_INHIBIT_REASON(IRQWIN), \ __APICV_INHIBIT_REASON(PIT_REINJ), \ __APICV_INHIBIT_REASON(SEV), \ - __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED) + __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \ + __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG) struct kvm_arch { unsigned long n_used_mmu_pages; @@ -1344,7 +1358,7 @@ struct kvm_arch { bool has_private_mem; bool has_protected_state; bool pre_fault_allowed; - struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; + struct hlist_head *mmu_page_hash; struct list_head active_mmu_pages; /* * A list of kvm_mmu_page structs that, if zapped, could possibly be @@ -1373,11 +1387,13 @@ struct kvm_arch { #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; -#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE - atomic_t assigned_device_count; + unsigned long nr_possible_bypass_irqs; + +#ifdef CONFIG_KVM_IOAPIC struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; +#endif atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map __rcu *apic_map; @@ -1392,12 +1408,8 @@ struct kvm_arch { gpa_t wall_clock; - bool mwait_in_guest; - bool hlt_in_guest; - bool pause_in_guest; - bool cstate_in_guest; + u64 disabled_exits; - unsigned long irq_sources_bitmap; s64 kvmclock_offset; /* @@ -1426,9 +1438,6 @@ struct kvm_arch { struct delayed_work kvmclock_update_work; struct delayed_work kvmclock_sync_work; - /* reads protected by irq_srcu, writes by irq_lock */ - struct hlist_head mask_notifier_list; - #ifdef CONFIG_KVM_HYPERV struct kvm_hv hyperv; #endif @@ -1451,6 +1460,7 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + bool has_mapped_host_mmio; bool guest_can_read_msr_platform_info; bool exception_payload_enabled; @@ -1674,6 +1684,12 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; } +enum kvm_x86_run_flags { + KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), + KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), +}; + struct kvm_x86_ops { const char *name; @@ -1702,6 +1718,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); @@ -1724,7 +1746,6 @@ struct kvm_x86_ops { void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); - void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); @@ -1755,7 +1776,7 @@ struct kvm_x86_ops { int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu, - bool force_immediate_exit); + u64 run_flags); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); @@ -1847,9 +1868,10 @@ struct kvm_x86_ops { void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); - int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); - void (*pi_start_assignment)(struct kvm *kvm); + int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_vcpu *vcpu, u32 vector); + void (*pi_start_bypass)(struct kvm *kvm); void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); @@ -1886,7 +1908,7 @@ struct kvm_x86_ops { int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); - void (*msr_filter_changed)(struct kvm_vcpu *vcpu); + void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); @@ -1944,6 +1966,7 @@ struct kvm_arch_async_pf { extern u32 __read_mostly kvm_nr_uret_msrs; extern bool __read_mostly allow_smaller_maxphyaddr; extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; extern bool __read_mostly enable_device_posted_irqs; extern struct kvm_x86_ops kvm_x86_ops; @@ -1962,7 +1985,7 @@ void kvm_x86_vendor_exit(void); #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + return kvzalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT); } #define __KVM_HAVE_ARCH_VM_FREE @@ -2007,7 +2030,7 @@ void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_init_vm(struct kvm *kvm); +int kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, @@ -2038,19 +2061,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); -struct kvm_irq_mask_notifier { - void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); - int irq; - struct hlist_node link; -}; - -void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, - bool mask); - extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); @@ -2209,9 +2219,6 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, return !!(*irq_state); } -int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); -void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); - void kvm_inject_nmi(struct kvm_vcpu *vcpu); int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); @@ -2388,9 +2395,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); -void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq); - static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e1752ba47e67..abc4659f5809 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -112,12 +112,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } -/* Hypercall to the L0 hypervisor */ -static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output) -{ - return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output); -} - /* Fast hypercall with 8 bytes of input and no output */ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { @@ -165,13 +159,6 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) return _hv_do_fast_hypercall8(control, input1); } -static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall8(control, input1); -} - /* Fast hypercall with 16 bytes of input */ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) { @@ -223,13 +210,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) return _hv_do_fast_hypercall16(control, input1, input2); } -static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall16(control, input1, input2); -} - extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) @@ -262,6 +242,8 @@ static inline void hv_apic_init(void) {} struct irq_domain *hv_create_pci_msi_domain(void); +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b7dded3c8113..b65c3ba5fa14 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -419,6 +419,7 @@ #define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) +#define DEBUGCTLMSR_RTM_DEBUG BIT(15) #define MSR_PEBS_FRONTEND 0x000003f7 @@ -628,6 +629,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 @@ -732,6 +734,11 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +/* AMD Hardware Feedback Support MSRs */ +#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 +#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501 +#define MSR_AMD_WORKLOAD_HRST 0xc0000502 + /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e @@ -830,6 +837,7 @@ #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) #define MSR_K7_HWCR_IRPERF_EN_BIT 30 #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) +#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35 #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 #define MSR_K7_HWCR_CPB_DIS_BIT 25 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index dd2b129b0418..6ca6516c7492 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) static __always_inline void __mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); - /* * Use the instruction mnemonic with implicit operands, as the LLVM * assembler fails to assemble the mnemonic with explicit operands: @@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" @@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) */ static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } @@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx) */ static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - if (ecx & 1) { - __mwait(eax, ecx); - } else { - __sti_mwait(eax, ecx); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + if (ecx & 1) { + __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); } } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d754b98f3f..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -302,25 +302,31 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static __always_inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b74ec5c3643b..a5731fb1e9dd 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -214,9 +214,6 @@ enum page_cache_mode { #define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) #define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) -#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) -#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) - /* * Page tables needs to have Write=1 in order for any lower PTEs to be * writable. This includes shadow stack memory (Write=0, Dirty=1) diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index f607081a022a..e406a1e92c63 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -78,7 +78,7 @@ extern unsigned char secondary_startup_64[]; extern unsigned char secondary_startup_64_no_verify[]; #endif -static inline size_t real_mode_size_needed(void) +static __always_inline size_t real_mode_size_needed(void) { if (real_mode_header) return 0; /* already allocated. */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d42e41..89075ff19afa 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,18 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed; +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -231,6 +243,7 @@ struct snp_guest_req { size_t resp_sz; u64 exit_code; + u64 exitinfo2; unsigned int vmpck_id; u8 msg_version; u8 msg_type; @@ -282,8 +295,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3]; + /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed; struct snp_msg_desc { @@ -445,7 +461,7 @@ static __always_inline void sev_es_nmi_complete(void) cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) __sev_es_nmi_complete(); } -extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd); extern void sev_enable(struct boot_params *bp); /* @@ -486,8 +502,6 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) return rc; } -struct snp_guest_request_ioctl; - void setup_ghcb(void); void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages); @@ -513,8 +527,7 @@ void snp_kexec_begin(void); int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id); struct snp_msg_desc *snp_msg_alloc(void); void snp_msg_free(struct snp_msg_desc *mdesc); -int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, - struct snp_guest_request_ioctl *rio); +int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req); int snp_svsm_vtpm_send_command(u8 *buffer); @@ -556,7 +569,7 @@ static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } -static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline int sev_es_efi_map_ghcbs_cas(pgd_t *pgd) { return 0; } static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } @@ -587,8 +600,8 @@ static inline void snp_kexec_begin(void) { } static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; } static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; } static inline void snp_msg_free(struct snp_msg_desc *mdesc) { } -static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, - struct snp_guest_request_ioctl *rio) { return -ENODEV; } +static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, + struct snp_guest_req *req) { return -ENODEV; } static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; } static inline void __init snp_secure_tsc_prepare(void) { } static inline void __init snp_secure_tsc_init(void) { } diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index d8525e6ef50a..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 0c1c68039d6f..22bfebe6776d 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -112,7 +112,10 @@ void __noreturn hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); -int wbinvd_on_all_cpus(void); +void wbinvd_on_all_cpus(void); +void wbinvd_on_cpus_mask(struct cpumask *cpus); +void wbnoinvd_on_all_cpus(void); +void wbnoinvd_on_cpus_mask(struct cpumask *cpus); void smp_kick_mwait_play_dead(void); void __noreturn mwait_play_dead(unsigned int eax_hint); @@ -148,10 +151,24 @@ static inline struct cpumask *cpu_l2c_shared_mask(int cpu) #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() -static inline int wbinvd_on_all_cpus(void) +static inline void wbinvd_on_all_cpus(void) { wbinvd(); - return 0; +} + +static inline void wbinvd_on_cpus_mask(struct cpumask *cpus) +{ + wbinvd(); +} + +static inline void wbnoinvd_on_all_cpus(void) +{ + wbnoinvd(); +} + +static inline void wbnoinvd_on_cpus_mask(struct cpumask *cpus) +{ + wbnoinvd(); } static inline struct cpumask *cpu_llc_shared_mask(int cpu) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index ecda17efa042..fde2bd7af19e 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -104,9 +104,36 @@ static inline void wrpkru(u32 pkru) } #endif +/* + * Write back all modified lines in all levels of cache associated with this + * logical processor to main memory, and then invalidate all caches. Depending + * on the micro-architecture, WBINVD (and WBNOINVD below) may or may not affect + * lower level caches associated with another logical processor that shares any + * level of this processor's cache hierarchy. + */ static __always_inline void wbinvd(void) { - asm volatile("wbinvd": : :"memory"); + asm volatile("wbinvd" : : : "memory"); +} + +/* Instruction encoding provided for binutils backwards compatibility. */ +#define ASM_WBNOINVD _ASM_BYTES(0xf3,0x0f,0x09) + +/* + * Write back all modified lines in all levels of cache associated with this + * logical processor to main memory, but do NOT explicitly invalidate caches, + * i.e. leave all/most cache lines in the hierarchy in non-modified state. + */ +static __always_inline void wbnoinvd(void) +{ + /* + * Explicitly encode WBINVD if X86_FEATURE_WBNOINVD is unavailable even + * though WBNOINVD is backwards compatible (it's simply WBINVD with an + * ignored REP prefix), to guarantee that WBNOINVD isn't used if it + * needs to be avoided for any reason. For all supported usage in the + * kernel, WBINVD is functionally a superset of WBNOINVD. + */ + alternative("wbinvd", ASM_WBNOINVD, X86_FEATURE_WBNOINVD); } static inline unsigned long __read_cr4(void) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index ad954a1a6656..ffc27f676243 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -252,16 +252,21 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) +/* + * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible + * tables. GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate + * GA log interrupts to wake the vCPU (because it's blocking or about to block). + */ +#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR BIT_ULL(61) + #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) -#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK GENMASK_ULL(51, 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) #define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL) #define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) -#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL - #define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 #define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 #define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF @@ -290,8 +295,6 @@ enum avic_ipi_failure_cause { static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID); static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID); -#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) - #define SVM_SEV_FEAT_SNP_ACTIVE BIT(0) #define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3) #define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; |