diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mshyperv.c')
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 210 |
1 files changed, 137 insertions, 73 deletions
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 01fa06dd06b6..f285757618fc 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -16,11 +16,10 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kexec.h> -#include <linux/i8253.h> #include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include <asm/mshyperv.h> #include <asm/desc.h> #include <asm/idtentry.h> @@ -45,70 +44,70 @@ bool hyperv_paravisor_present __ro_after_init; EXPORT_SYMBOL_GPL(hyperv_paravisor_present); #if IS_ENABLED(CONFIG_HYPERV) -static inline unsigned int hv_get_nested_reg(unsigned int reg) +static inline unsigned int hv_get_nested_msr(unsigned int reg) { - if (hv_is_sint_reg(reg)) - return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0; + if (hv_is_sint_msr(reg)) + return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0; switch (reg) { - case HV_REGISTER_SIMP: - return HV_REGISTER_NESTED_SIMP; - case HV_REGISTER_SIEFP: - return HV_REGISTER_NESTED_SIEFP; - case HV_REGISTER_SVERSION: - return HV_REGISTER_NESTED_SVERSION; - case HV_REGISTER_SCONTROL: - return HV_REGISTER_NESTED_SCONTROL; - case HV_REGISTER_EOM: - return HV_REGISTER_NESTED_EOM; + case HV_X64_MSR_SIMP: + return HV_X64_MSR_NESTED_SIMP; + case HV_X64_MSR_SIEFP: + return HV_X64_MSR_NESTED_SIEFP; + case HV_X64_MSR_SVERSION: + return HV_X64_MSR_NESTED_SVERSION; + case HV_X64_MSR_SCONTROL: + return HV_X64_MSR_NESTED_SCONTROL; + case HV_X64_MSR_EOM: + return HV_X64_MSR_NESTED_EOM; default: return reg; } } -u64 hv_get_non_nested_register(unsigned int reg) +u64 hv_get_non_nested_msr(unsigned int reg) { u64 value; - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) hv_ivm_msr_read(reg, &value); else rdmsrl(reg, value); return value; } -EXPORT_SYMBOL_GPL(hv_get_non_nested_register); +EXPORT_SYMBOL_GPL(hv_get_non_nested_msr); -void hv_set_non_nested_register(unsigned int reg, u64 value) +void hv_set_non_nested_msr(unsigned int reg, u64 value) { - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) { + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) { hv_ivm_msr_write(reg, value); /* Write proxy bit via wrmsl instruction */ - if (hv_is_sint_reg(reg)) + if (hv_is_sint_msr(reg)) wrmsrl(reg, value | 1 << 20); } else { wrmsrl(reg, value); } } -EXPORT_SYMBOL_GPL(hv_set_non_nested_register); +EXPORT_SYMBOL_GPL(hv_set_non_nested_msr); -u64 hv_get_register(unsigned int reg) +u64 hv_get_msr(unsigned int reg) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - return hv_get_non_nested_register(reg); + return hv_get_non_nested_msr(reg); } -EXPORT_SYMBOL_GPL(hv_get_register); +EXPORT_SYMBOL_GPL(hv_get_msr); -void hv_set_register(unsigned int reg, u64 value) +void hv_set_msr(unsigned int reg, u64 value) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - hv_set_non_nested_register(reg, value); + hv_set_non_nested_msr(reg, value); } -EXPORT_SYMBOL_GPL(hv_set_register); +EXPORT_SYMBOL_GPL(hv_set_msr); static void (*vmbus_handler)(void); static void (*hv_stimer0_handler)(void); @@ -199,8 +198,8 @@ static void hv_machine_shutdown(void) * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor * corrupts the old VP Assist Pages and can crash the kexec kernel. */ - if (kexec_in_progress && hyperv_init_cpuhp > 0) - cpuhp_remove_state(hyperv_init_cpuhp); + if (kexec_in_progress) + cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE); /* The function calls stop_other_cpus(). */ native_machine_shutdown(); @@ -209,7 +208,9 @@ static void hv_machine_shutdown(void) if (kexec_in_progress) hyperv_cleanup(); } +#endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_CRASH_DUMP static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) @@ -221,7 +222,64 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) /* Disable the hypercall page when there is only 1 active CPU. */ hyperv_cleanup(); } -#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_CRASH_DUMP */ + +static u64 hv_ref_counter_at_suspend; +static void (*old_save_sched_clock_state)(void); +static void (*old_restore_sched_clock_state)(void); + +/* + * Hyper-V clock counter resets during hibernation. Save and restore clock + * offset during suspend/resume, while also considering the time passed + * before suspend. This is to make sure that sched_clock using hv tsc page + * based clocksource, proceeds from where it left off during suspend and + * it shows correct time for the timestamps of kernel messages after resume. + */ +static void save_hv_clock_tsc_state(void) +{ + hv_ref_counter_at_suspend = hv_read_reference_counter(); +} + +static void restore_hv_clock_tsc_state(void) +{ + /* + * Adjust the offsets used by hv tsc clocksource to + * account for the time spent before hibernation. + * adjusted value = reference counter (time) at suspend + * - reference counter (time) now. + */ + hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter()); +} + +/* + * Functions to override save_sched_clock_state and restore_sched_clock_state + * functions of x86_platform. The Hyper-V clock counter is reset during + * suspend-resume and the offset used to measure time needs to be + * corrected, post resume. + */ +static void hv_save_sched_clock_state(void) +{ + old_save_sched_clock_state(); + save_hv_clock_tsc_state(); +} + +static void hv_restore_sched_clock_state(void) +{ + restore_hv_clock_tsc_state(); + old_restore_sched_clock_state(); +} + +static void __init x86_setup_ops_for_tsc_pg_clock(void) +{ + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return; + + old_save_sched_clock_state = x86_platform.save_sched_clock_state; + x86_platform.save_sched_clock_state = hv_save_sched_clock_state; + + old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; + x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; +} #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -350,13 +408,24 @@ static void __init reduced_hw_init(void) x86_init.irqs.pre_vector_init = x86_init_noop; } +int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) +{ + unsigned int hv_max_functions; + + hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); + if (hv_max_functions < HYPERV_CPUID_VERSION) { + pr_err("%s: Could not detect Hyper-V version\n", __func__); + return -ENODEV; + } + + cpuid(HYPERV_CPUID_VERSION, &info->eax, &info->ebx, &info->ecx, &info->edx); + + return 0; +} + static void __init ms_hyperv_init_platform(void) { int hv_max_functions_eax; - int hv_host_info_eax; - int hv_host_info_ebx; - int hv_host_info_ecx; - int hv_host_info_edx; #ifdef CONFIG_PARAVIRT pv_info.name = "Hyper-V"; @@ -407,25 +476,11 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: running on a nested hypervisor\n"); } - /* - * Extract host information. - */ - if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) { - hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); - hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); - hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); - hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); - - pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", - hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF, - hv_host_info_eax, hv_host_info_edx & 0xFFFFFF, - hv_host_info_ecx, hv_host_info_edx >> 24); - } - if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { x86_platform.calibrate_tsc = hv_get_tsc_khz; x86_platform.calibrate_cpu = hv_get_tsc_khz; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); } if (ms_hyperv.priv_high & HV_ISOLATION) { @@ -451,10 +506,24 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; if (!ms_hyperv.paravisor_present) { - /* To be supported: more work is required. */ + /* + * Mark the Hyper-V TSC page feature as disabled + * in a TDX VM without paravisor so that the + * Invariant TSC, which is a better clocksource + * anyway, is used instead. + */ ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; - /* HV_REGISTER_CRASH_CTL is unsupported. */ + /* + * The Invariant TSC is expected to be available + * in a TDX VM without paravisor, but if not, + * print a warning message. The slower Hyper-V MSR-based + * Ref Counter should end up being the clocksource. + */ + if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + pr_warn("Hyper-V: Invariant TSC is unavailable\n"); + + /* HV_MSR_CRASH_CTL is unsupported. */ ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; /* Don't trust Hyper-V's TLB-flushing hypercalls. */ @@ -495,10 +564,14 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif -#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) +#if IS_ENABLED(CONFIG_HYPERV) +#if defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; +#endif +#if defined(CONFIG_CRASH_DUMP) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif +#endif if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { /* * Writing to synthetic MSR 0x40000118 updates/changes the @@ -520,16 +593,6 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; - /* - * Hyper-V VMs have a PIT emulation quirk such that zeroing the - * counter register during PIT shutdown restarts the PIT. So it - * continues to interrupt @18.2 HZ. Setting i8253_clear_counter - * to false tells pit_shutdown() not to zero the counter so that - * the PIT really is shutdown. Generation 2 VMs don't have a PIT, - * and setting this value has no effect. - */ - i8253_clear_counter_on_shutdown = false; - #if IS_ENABLED(CONFIG_HYPERV) if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || ms_hyperv.paravisor_present) @@ -539,19 +602,18 @@ static void __init ms_hyperv_init_platform(void) */ x86_platform.apic_post_init = hyperv_init; hyperv_setup_mmu_ops(); - /* Setup the IDT for hypervisor callback */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback); - /* Setup the IDT for reenlightenment notifications */ + /* Install system interrupt handler for hypervisor callback */ + sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); + + /* Install system interrupt handler for reenlightenment notifications */ if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) { - alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, - asm_sysvec_hyperv_reenlightenment); + sysvec_install(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); } - /* Setup the IDT for stimer0 */ + /* Install system interrupt handler for stimer0 */ if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) { - alloc_intr_gate(HYPERV_STIMER0_VECTOR, - asm_sysvec_hyperv_stimer0); + sysvec_install(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); } # ifdef CONFIG_SMP @@ -574,6 +636,7 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); + x86_setup_ops_for_tsc_pg_clock(); hv_vtl_init_platform(); #endif /* @@ -643,6 +706,7 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init.x2apic_available = ms_hyperv_x2apic_available, .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, .init.init_platform = ms_hyperv_init_platform, + .init.guest_late_init = ms_hyperv_late_init, #ifdef CONFIG_AMD_MEM_ENCRYPT .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, |