diff options
Diffstat (limited to 'arch/x86/hyperv/ivm.c')
| -rw-r--r-- | arch/x86/hyperv/ivm.c | 251 |
1 files changed, 241 insertions, 10 deletions
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 60fc3ed72830..651771534cae 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -7,9 +7,10 @@ */ #include <linux/bitfield.h> -#include <linux/hyperv.h> #include <linux/types.h> #include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/export.h> #include <asm/svm.h> #include <asm/sev.h> #include <asm/io.h> @@ -23,6 +24,7 @@ #include <asm/realmode.h> #include <asm/e820/api.h> #include <asm/desc.h> +#include <asm/msr.h> #include <uapi/asm/vmx.h> #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -111,12 +113,12 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) static inline u64 rd_ghcb_msr(void) { - return __rdmsr(MSR_AMD64_SEV_ES_GHCB); + return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB); } static inline void wr_ghcb_msr(u64 val) { - native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val); + native_wrmsrq(MSR_AMD64_SEV_ES_GHCB, val); } static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, @@ -289,7 +291,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) __get_free_page(GFP_KERNEL | __GFP_ZERO); @@ -298,10 +300,16 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) u64 ret, retry = 5; struct hv_enable_vp_vtl *start_vp_input; unsigned long flags; + int vp_index; if (!vmsa) return -ENOMEM; + /* Find the Hyper-V VP index which might be not the same as APIC ID */ + vp_index = hv_apicid_to_vp_index(apic_id); + if (vp_index < 0 || vp_index > ms_hyperv.max_vp_index) + return -EINVAL; + native_store_gdt(&gdtr); vmsa->gdtr.base = gdtr.address; @@ -339,7 +347,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) vmsa->sev_features = sev_status >> 2; ret = snp_set_vmsa(vmsa, true); - if (!ret) { + if (ret) { pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); free_page((u64)vmsa); return ret; @@ -349,7 +357,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; memset(start_vp_input, 0, sizeof(*start_vp_input)); start_vp_input->partition_id = -1; - start_vp_input->vp_index = cpu; + start_vp_input->vp_index = vp_index; start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; @@ -377,9 +385,23 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) return ret; } +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) +{ + u64 hv_status; + + register u64 __r8 asm("r8") = param2; + asm volatile("vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (param1), "+r" (__r8) + : : "cc", "memory", "r9", "r10", "r11"); + + return hv_status; +} + #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ #ifdef CONFIG_INTEL_TDX_GUEST @@ -429,6 +451,7 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) #else static inline void hv_tdx_msr_write(u64 msr, u64 value) {} static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } #endif /* CONFIG_INTEL_TDX_GUEST */ #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) @@ -455,6 +478,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value) } /* + * Keep track of the PFN regions which were shared with the host. The access + * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()). + */ +struct hv_enc_pfn_region { + struct list_head list; + u64 pfn; + int count; +}; + +static LIST_HEAD(hv_list_enc); +static DEFINE_RAW_SPINLOCK(hv_list_enc_lock); + +static int hv_list_enc_add(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + /* Check if the PFN already exists in some region first */ + list_for_each_entry(ent, &hv_list_enc, list) { + if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn)) + /* Nothing to do - pfn is already in the list */ + goto unlock_done; + } + + /* + * Check if the PFN is adjacent to an existing region. Growing + * a region can make it adjacent to another one but merging is + * not (yet) implemented for simplicity. A PFN cannot be added + * to two regions to keep the logic in hv_list_enc_remove() + * correct. + */ + list_for_each_entry(ent, &hv_list_enc, list) { + if (ent->pfn + ent->count == pfn) { + /* Grow existing region up */ + ent->count++; + goto unlock_done; + } else if (pfn + 1 == ent->pfn) { + /* Grow existing region down */ + ent->pfn--; + ent->count++; + goto unlock_done; + } + } + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + /* No adjacent region found -- create a new one */ + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = pfn; + ent->count = 1; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +static int hv_list_enc_remove(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent, *t; + struct hv_enc_pfn_region new_region; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_for_each_entry_safe(ent, t, &hv_list_enc, list) { + if (pfn == ent->pfn + ent->count - 1) { + /* Removing tail pfn */ + ent->count--; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn == ent->pfn) { + /* Removing head pfn */ + ent->count--; + ent->pfn++; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) { + /* + * Removing a pfn in the middle. Cut off the tail + * of the existing region and create a template for + * the new one. + */ + new_region.pfn = pfn + 1; + new_region.count = ent->count - (pfn - ent->pfn + 1); + ent->count = pfn - ent->pfn; + goto unlock_split; + } + + } +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + continue; + +unlock_split: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = new_region.pfn; + ent->count = new_region.count; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +/* Stop new private<->shared conversions */ +static void hv_vtom_kexec_begin(void) +{ + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* + * Crash kernel reaches here with interrupts disabled: can't wait for + * conversions to finish. + * + * If race happened, just report and proceed. + */ + if (!set_memory_enc_stop_conversion()) + pr_warn("Failed to stop shared<->private conversions\n"); +} + +static void hv_vtom_kexec_finish(void) +{ + struct hv_gpa_range_for_visibility *input; + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 hv_status; + int cur, i; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + if (unlikely(!input)) + goto out; + + list_for_each_entry(ent, &hv_list_enc, list) { + for (i = 0, cur = 0; i < ent->count; i++) { + input->gpa_page_list[cur] = ent->pfn + i; + cur++; + + if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) { + input->partition_id = HV_PARTITION_ID_SELF; + input->host_visibility = VMBUS_PAGE_NOT_VISIBLE; + input->reserved0 = 0; + input->reserved1 = 0; + hv_status = hv_do_rep_hypercall( + HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, + cur, 0, input, NULL); + WARN_ON_ONCE(!hv_result_success(hv_status)); + cur = 0; + } + } + + } + +out: + local_irq_restore(flags); +} + +/* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * * In Isolation VM, all guest memory is encrypted from host and guest @@ -465,9 +677,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], enum hv_mem_host_visibility visibility) { struct hv_gpa_range_for_visibility *input; - u16 pages_processed; u64 hv_status; unsigned long flags; + int ret; /* no-op if partition isolation is not enabled */ if (!hv_is_isolation_supported()) @@ -479,6 +691,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], return -EINVAL; } + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_remove(pfn, count); + else + ret = hv_list_enc_add(pfn, count); + if (ret) + return ret; + local_irq_save(flags); input = *this_cpu_ptr(hyperv_pcpu_input_arg); @@ -494,13 +713,23 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); hv_status = hv_do_rep_hypercall( HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, - 0, input, &pages_processed); + 0, input, NULL); local_irq_restore(flags); if (hv_result_success(hv_status)) return 0; + + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_add(pfn, count); else - return -EFAULT; + ret = hv_list_enc_remove(pfn, count); + /* + * There's no good way to recover from -ENOMEM here, the accounting is + * wrong either way. + */ + WARN_ON_ONCE(ret); + + return -EFAULT; } /* @@ -662,9 +891,11 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; + x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin; + x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish; /* Set WB as the default cache mode. */ - mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); + guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); } #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ |
