From 9c8434238041e18d53fd6911826973b37656a8d1 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Mon, 20 Mar 2023 00:47:38 -0700 Subject: Drivers: hv: vmbus: Convert acpi_device to more generic platform_device VMBus driver code currently has direct dependency on ACPI and struct acpi_device. As a staging step toward optionally configuring based on Devicetree instead of ACPI, use a more generic platform device to reduce the dependency on ACPI where possible, though the dependency on ACPI is not completely removed. Also rename the function vmbus_acpi_remove() to the more generic vmbus_mmio_remove(). Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1679298460-11855-4-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 58 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 23 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d24dd65b33d4..73497157a23a 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -44,7 +45,7 @@ struct vmbus_dynid { struct hv_vmbus_device_id id; }; -static struct acpi_device *hv_acpi_dev; +static struct device *hv_dev; static int hyperv_cpuhp_online; @@ -143,7 +144,7 @@ static DEFINE_MUTEX(hyperv_mmio_lock); static int vmbus_exists(void) { - if (hv_acpi_dev == NULL) + if (hv_dev == NULL) return -ENODEV; return 0; @@ -932,7 +933,7 @@ static int vmbus_dma_configure(struct device *child_device) * On x86/x64 coherence is assumed and these calls have no effect. */ hv_setup_dma_ops(child_device, - device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT); + device_get_dma_attr(hv_dev) == DEV_DMA_COHERENT); return 0; } @@ -2090,7 +2091,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) &child_device_obj->channel->offermsg.offer.if_instance); child_device_obj->device.bus = &hv_bus; - child_device_obj->device.parent = &hv_acpi_dev->dev; + child_device_obj->device.parent = hv_dev; child_device_obj->device.release = vmbus_device_release; child_device_obj->device.dma_parms = &child_device_obj->dma_parms; @@ -2262,7 +2263,7 @@ static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) return AE_OK; } -static void vmbus_acpi_remove(struct acpi_device *device) +static void vmbus_mmio_remove(void) { struct resource *cur_res; struct resource *next_res; @@ -2441,13 +2442,14 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) } EXPORT_SYMBOL_GPL(vmbus_free_mmio); -static int vmbus_acpi_add(struct acpi_device *device) +static int vmbus_acpi_add(struct platform_device *pdev) { acpi_status result; int ret_val = -ENODEV; struct acpi_device *ancestor; + struct acpi_device *device = ACPI_COMPANION(&pdev->dev); - hv_acpi_dev = device; + hv_dev = &device->dev; /* * Older versions of Hyper-V for ARM64 fail to include the _CCA @@ -2489,10 +2491,21 @@ static int vmbus_acpi_add(struct acpi_device *device) acpi_walk_err: if (ret_val) - vmbus_acpi_remove(device); + vmbus_mmio_remove(); return ret_val; } +static int vmbus_platform_driver_probe(struct platform_device *pdev) +{ + return vmbus_acpi_add(pdev); +} + +static int vmbus_platform_driver_remove(struct platform_device *pdev) +{ + vmbus_mmio_remove(); + return 0; +} + #ifdef CONFIG_PM_SLEEP static int vmbus_bus_suspend(struct device *dev) { @@ -2658,15 +2671,15 @@ static const struct dev_pm_ops vmbus_bus_pm = { .restore_noirq = vmbus_bus_resume }; -static struct acpi_driver vmbus_acpi_driver = { - .name = "vmbus", - .ids = vmbus_acpi_device_ids, - .ops = { - .add = vmbus_acpi_add, - .remove = vmbus_acpi_remove, - }, - .drv.pm = &vmbus_bus_pm, - .drv.probe_type = PROBE_FORCE_SYNCHRONOUS, +static struct platform_driver vmbus_platform_driver = { + .probe = vmbus_platform_driver_probe, + .remove = vmbus_platform_driver_remove, + .driver = { + .name = "vmbus", + .acpi_match_table = ACPI_PTR(vmbus_acpi_device_ids), + .pm = &vmbus_bus_pm, + .probe_type = PROBE_FORCE_SYNCHRONOUS, + } }; static void hv_kexec_handler(void) @@ -2750,12 +2763,11 @@ static int __init hv_acpi_init(void) /* * Get ACPI resources first. */ - ret = acpi_bus_register_driver(&vmbus_acpi_driver); - + ret = platform_driver_register(&vmbus_platform_driver); if (ret) return ret; - if (!hv_acpi_dev) { + if (!hv_dev) { ret = -ENODEV; goto cleanup; } @@ -2785,8 +2797,8 @@ static int __init hv_acpi_init(void) return 0; cleanup: - acpi_bus_unregister_driver(&vmbus_acpi_driver); - hv_acpi_dev = NULL; + platform_driver_unregister(&vmbus_platform_driver); + hv_dev = NULL; return ret; } @@ -2839,7 +2851,7 @@ static void __exit vmbus_exit(void) cpuhp_remove_state(hyperv_cpuhp_online); hv_synic_free(); - acpi_bus_unregister_driver(&vmbus_acpi_driver); + platform_driver_unregister(&vmbus_platform_driver); } -- cgit From f83705a51275ed29117d46e1d68e8b16dcb40507 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Mon, 20 Mar 2023 00:47:40 -0700 Subject: Driver: VMBus: Add Devicetree support Update the driver to support Devicetree boot as well along with ACPI. At present the Devicetree parsing only provides the mmio region info and is not the exact copy of ACPI parsing. This is sufficient to cater all the current Devicetree usecases for VMBus. Currently Devicetree is supported only for x86 systems. Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1679298460-11855-6-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 5 ++-- drivers/hv/vmbus_drv.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 6 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 0747a8f1fcee..47132b30b7ee 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -4,11 +4,12 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" - depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ - || (ARM64 && !CPU_BIG_ENDIAN)) + depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ + || (ACPI && ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 select VMAP_PFN + select OF_EARLY_FLATTREE if OF help Select this option to run Linux as a Hyper-V client operating system. diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 73497157a23a..20b4be426e0c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2152,7 +2153,7 @@ void vmbus_device_unregister(struct hv_device *device_obj) device_unregister(&device_obj->device); } - +#ifdef CONFIG_ACPI /* * VMBUS is an acpi enumerated device. Get the information we * need from DSDT. @@ -2262,6 +2263,7 @@ static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) return AE_OK; } +#endif static void vmbus_mmio_remove(void) { @@ -2282,7 +2284,7 @@ static void vmbus_mmio_remove(void) } } -static void vmbus_reserve_fb(void) +static void __maybe_unused vmbus_reserve_fb(void) { resource_size_t start = 0, size; struct pci_dev *pdev; @@ -2442,6 +2444,7 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) } EXPORT_SYMBOL_GPL(vmbus_free_mmio); +#ifdef CONFIG_ACPI static int vmbus_acpi_add(struct platform_device *pdev) { acpi_status result; @@ -2494,10 +2497,54 @@ acpi_walk_err: vmbus_mmio_remove(); return ret_val; } +#else +static int vmbus_acpi_add(struct platform_device *pdev) +{ + return 0; +} +#endif + +static int vmbus_device_add(struct platform_device *pdev) +{ + struct resource **cur_res = &hyperv_mmio; + struct of_range range; + struct of_range_parser parser; + struct device_node *np = pdev->dev.of_node; + int ret; + + hv_dev = &pdev->dev; + + ret = of_range_parser_init(&parser, np); + if (ret) + return ret; + + for_each_of_range(&parser, &range) { + struct resource *res; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + vmbus_mmio_remove(); + return -ENOMEM; + } + + res->name = "hyperv mmio"; + res->flags = range.flags; + res->start = range.cpu_addr; + res->end = range.cpu_addr + range.size; + + *cur_res = res; + cur_res = &res->sibling; + } + + return ret; +} static int vmbus_platform_driver_probe(struct platform_device *pdev) { - return vmbus_acpi_add(pdev); + if (acpi_disabled) + return vmbus_device_add(pdev); + else + return vmbus_acpi_add(pdev); } static int vmbus_platform_driver_remove(struct platform_device *pdev) @@ -2643,7 +2690,17 @@ static int vmbus_bus_resume(struct device *dev) #define vmbus_bus_resume NULL #endif /* CONFIG_PM_SLEEP */ -static const struct acpi_device_id vmbus_acpi_device_ids[] = { +static const __maybe_unused struct of_device_id vmbus_of_match[] = { + { + .compatible = "microsoft,vmbus", + }, + { + /* sentinel */ + }, +}; +MODULE_DEVICE_TABLE(of, vmbus_of_match); + +static const __maybe_unused struct acpi_device_id vmbus_acpi_device_ids[] = { {"VMBUS", 0}, {"VMBus", 0}, {"", 0}, @@ -2677,6 +2734,7 @@ static struct platform_driver vmbus_platform_driver = { .driver = { .name = "vmbus", .acpi_match_table = ACPI_PTR(vmbus_acpi_device_ids), + .of_match_table = of_match_ptr(vmbus_of_match), .pm = &vmbus_bus_pm, .probe_type = PROBE_FORCE_SYNCHRONOUS, } -- cgit From a5ddb74588213c31ce993a8e9a09d1ffdc11a142 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:03 -0700 Subject: Drivers: hv: vmbus: Remove second mapping of VMBus monitor pages With changes to how Hyper-V guest VMs flip memory between private (encrypted) and shared (decrypted), creating a second kernel virtual mapping for shared memory is no longer necessary. Everything needed for the transition to shared is handled by set_memory_decrypted(). As such, remove the code to create and manage the second mapping for VMBus monitor pages. Because set_memory_decrypted() and set_memory_encrypted() are no-ops in normal VMs, it's not even necessary to test for being in a Confidential VM (a.k.a., "Isolation VM"). Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-9-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/connection.c | 113 ++++++++++++---------------------------------- drivers/hv/hyperv_vmbus.h | 2 - 2 files changed, 28 insertions(+), 87 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index da51b50787df..5978e9dbc286 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -104,8 +104,14 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; } - msg->monitor_page1 = vmbus_connection.monitor_pages_pa[0]; - msg->monitor_page2 = vmbus_connection.monitor_pages_pa[1]; + /* + * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always + * bitwise OR it + */ + msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) | + ms_hyperv.shared_gpa_boundary; + msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) | + ms_hyperv.shared_gpa_boundary; msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); @@ -219,72 +225,27 @@ int vmbus_connect(void) * Setup the monitor notification facility. The 1st page for * parent->child and the 2nd page for child->parent */ - vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page(); - vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page(); + vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_page(); + vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_page(); if ((vmbus_connection.monitor_pages[0] == NULL) || (vmbus_connection.monitor_pages[1] == NULL)) { ret = -ENOMEM; goto cleanup; } - vmbus_connection.monitor_pages_original[0] - = vmbus_connection.monitor_pages[0]; - vmbus_connection.monitor_pages_original[1] - = vmbus_connection.monitor_pages[1]; - vmbus_connection.monitor_pages_pa[0] - = virt_to_phys(vmbus_connection.monitor_pages[0]); - vmbus_connection.monitor_pages_pa[1] - = virt_to_phys(vmbus_connection.monitor_pages[1]); - - if (hv_is_isolation_supported()) { - ret = set_memory_decrypted((unsigned long) - vmbus_connection.monitor_pages[0], - 1); - ret |= set_memory_decrypted((unsigned long) - vmbus_connection.monitor_pages[1], - 1); - if (ret) - goto cleanup; - - /* - * Isolation VM with AMD SNP needs to access monitor page via - * address space above shared gpa boundary. - */ - if (hv_isolation_type_snp()) { - vmbus_connection.monitor_pages_pa[0] += - ms_hyperv.shared_gpa_boundary; - vmbus_connection.monitor_pages_pa[1] += - ms_hyperv.shared_gpa_boundary; - - vmbus_connection.monitor_pages[0] - = memremap(vmbus_connection.monitor_pages_pa[0], - HV_HYP_PAGE_SIZE, - MEMREMAP_WB); - if (!vmbus_connection.monitor_pages[0]) { - ret = -ENOMEM; - goto cleanup; - } - - vmbus_connection.monitor_pages[1] - = memremap(vmbus_connection.monitor_pages_pa[1], - HV_HYP_PAGE_SIZE, - MEMREMAP_WB); - if (!vmbus_connection.monitor_pages[1]) { - ret = -ENOMEM; - goto cleanup; - } - } - - /* - * Set memory host visibility hvcall smears memory - * and so zero monitor pages here. - */ - memset(vmbus_connection.monitor_pages[0], 0x00, - HV_HYP_PAGE_SIZE); - memset(vmbus_connection.monitor_pages[1], 0x00, - HV_HYP_PAGE_SIZE); + ret = set_memory_decrypted((unsigned long) + vmbus_connection.monitor_pages[0], 1); + ret |= set_memory_decrypted((unsigned long) + vmbus_connection.monitor_pages[1], 1); + if (ret) + goto cleanup; - } + /* + * Set_memory_decrypted() will change the memory contents if + * decryption occurs, so zero monitor pages here. + */ + memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE); + memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE); msginfo = kzalloc(sizeof(*msginfo) + sizeof(struct vmbus_channel_initiate_contact), @@ -376,31 +337,13 @@ void vmbus_disconnect(void) vmbus_connection.int_page = NULL; } - if (hv_is_isolation_supported()) { - /* - * memunmap() checks input address is ioremap address or not - * inside. It doesn't unmap any thing in the non-SNP CVM and - * so not check CVM type here. - */ - memunmap(vmbus_connection.monitor_pages[0]); - memunmap(vmbus_connection.monitor_pages[1]); - - set_memory_encrypted((unsigned long) - vmbus_connection.monitor_pages_original[0], - 1); - set_memory_encrypted((unsigned long) - vmbus_connection.monitor_pages_original[1], - 1); - } + set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1); + set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1); - hv_free_hyperv_page((unsigned long) - vmbus_connection.monitor_pages_original[0]); - hv_free_hyperv_page((unsigned long) - vmbus_connection.monitor_pages_original[1]); - vmbus_connection.monitor_pages_original[0] = - vmbus_connection.monitor_pages[0] = NULL; - vmbus_connection.monitor_pages_original[1] = - vmbus_connection.monitor_pages[1] = NULL; + hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]); + hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]); + vmbus_connection.monitor_pages[0] = NULL; + vmbus_connection.monitor_pages[1] = NULL; } /* diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index dc673edf053c..167ac5115a78 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -241,8 +241,6 @@ struct vmbus_connection { * is child->parent notification */ struct hv_monitor_page *monitor_pages[2]; - void *monitor_pages_original[2]; - phys_addr_t monitor_pages_pa[2]; struct list_head chn_msg_list; spinlock_t channelmsg_lock; -- cgit From bb862397f48fc79a1ea31b83a0bd8f1f913b4ab6 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:04 -0700 Subject: Drivers: hv: vmbus: Remove second way of mapping ring buffers With changes to how Hyper-V guest VMs flip memory between private (encrypted) and shared (decrypted), it's no longer necessary to have separate code paths for mapping VMBus ring buffers for for normal VMs and for Confidential VMs. As such, remove the code path that uses vmap_pfn(), and set the protection flags argument to vmap() to account for the difference between normal and Confidential VMs. Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-10-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/ring_buffer.c | 62 ++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 42 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 2111e97c3b63..3c9b02471760 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -186,8 +186,6 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, struct page *pages, u32 page_cnt, u32 max_pkt_size) { struct page **pages_wraparound; - unsigned long *pfns_wraparound; - u64 pfn; int i; BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE)); @@ -196,50 +194,30 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, * First page holds struct hv_ring_buffer, do wraparound mapping for * the rest. */ - if (hv_isolation_type_snp()) { - pfn = page_to_pfn(pages) + - PFN_DOWN(ms_hyperv.shared_gpa_boundary); + pages_wraparound = kcalloc(page_cnt * 2 - 1, + sizeof(struct page *), + GFP_KERNEL); + if (!pages_wraparound) + return -ENOMEM; - pfns_wraparound = kcalloc(page_cnt * 2 - 1, - sizeof(unsigned long), GFP_KERNEL); - if (!pfns_wraparound) - return -ENOMEM; - - pfns_wraparound[0] = pfn; - for (i = 0; i < 2 * (page_cnt - 1); i++) - pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1; - - ring_info->ring_buffer = (struct hv_ring_buffer *) - vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, - pgprot_decrypted(PAGE_KERNEL)); - kfree(pfns_wraparound); - - if (!ring_info->ring_buffer) - return -ENOMEM; - - /* Zero ring buffer after setting memory host visibility. */ - memset(ring_info->ring_buffer, 0x00, PAGE_SIZE * page_cnt); - } else { - pages_wraparound = kcalloc(page_cnt * 2 - 1, - sizeof(struct page *), - GFP_KERNEL); - if (!pages_wraparound) - return -ENOMEM; - - pages_wraparound[0] = pages; - for (i = 0; i < 2 * (page_cnt - 1); i++) - pages_wraparound[i + 1] = - &pages[i % (page_cnt - 1) + 1]; + pages_wraparound[0] = pages; + for (i = 0; i < 2 * (page_cnt - 1); i++) + pages_wraparound[i + 1] = + &pages[i % (page_cnt - 1) + 1]; - ring_info->ring_buffer = (struct hv_ring_buffer *) - vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, - PAGE_KERNEL); + ring_info->ring_buffer = (struct hv_ring_buffer *) + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, + pgprot_decrypted(PAGE_KERNEL)); - kfree(pages_wraparound); - if (!ring_info->ring_buffer) - return -ENOMEM; - } + kfree(pages_wraparound); + if (!ring_info->ring_buffer) + return -ENOMEM; + /* + * Ensure the header page is zero'ed since + * encryption status may have changed. + */ + memset(ring_info->ring_buffer, 0, HV_HYP_PAGE_SIZE); ring_info->ring_buffer->read_index = ring_info->ring_buffer->write_index = 0; -- cgit From 25727aaed6514b88f98a18862c6f2d65a0b0ec3b Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:05 -0700 Subject: hv_netvsc: Remove second mapping of send and recv buffers With changes to how Hyper-V guest VMs flip memory between private (encrypted) and shared (decrypted), creating a second kernel virtual mapping for shared memory is no longer necessary. Everything needed for the transition to shared is handled by set_memory_decrypted(). As such, remove the code to create and manage the second mapping for the pre-allocated send and recv buffers. This mapping is the last user of hv_map_memory()/hv_unmap_memory(), so delete these functions as well. Finally, hv_map_memory() is the last user of vmap_pfn() in Hyper-V guest code, so remove the Kconfig selection of VMAP_PFN. Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-11-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 1 - drivers/hv/hv_common.c | 11 ----------- 2 files changed, 12 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 47132b30b7ee..94982f08b661 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -8,7 +8,6 @@ config HYPERV || (ACPI && ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 - select VMAP_PFN select OF_EARLY_FLATTREE if OF help Select this option to run Linux as a Hyper-V client operating diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 52a6f89ccdbd..6d40b6c7b23b 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -311,14 +311,3 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s return HV_STATUS_INVALID_PARAMETER; } EXPORT_SYMBOL_GPL(hv_ghcb_hypercall); - -void __weak *hv_map_memory(void *addr, unsigned long size) -{ - return NULL; -} -EXPORT_SYMBOL_GPL(hv_map_memory); - -void __weak hv_unmap_memory(void *addr) -{ -} -EXPORT_SYMBOL_GPL(hv_unmap_memory); -- cgit From 6afd9dc1a4b158456c072580f0851b4dbaaa02f1 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:06 -0700 Subject: Drivers: hv: Don't remap addresses that are above shared_gpa_boundary With the vTOM bit now treated as a protection flag and not part of the physical address, avoid remapping physical addresses with vTOM set since technically such addresses aren't valid. Use ioremap_cache() instead of memremap() to ensure that the mapping provides decrypted access, which will correctly set the vTOM bit as a protection flag. While this change is not required for correctness with the current implementation of memremap(), for general code hygiene it's better to not depend on the mapping functions doing something reasonable with a physical address that is out-of-range. While here, fix typos in two error messages. Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-12-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/hv.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 8b0dd8e5244d..008234894d28 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -217,11 +217,13 @@ void hv_synic_enable_regs(unsigned int cpu) simp.simp_enabled = 1; if (hv_isolation_type_snp() || hv_root_partition) { + /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & + ~ms_hyperv.shared_gpa_boundary; hv_cpu->synic_message_page - = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, - HV_HYP_PAGE_SIZE, MEMREMAP_WB); + = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_message_page) - pr_err("Fail to map syinc message page.\n"); + pr_err("Fail to map synic message page.\n"); } else { simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) >> HV_HYP_PAGE_SHIFT; @@ -234,12 +236,13 @@ void hv_synic_enable_regs(unsigned int cpu) siefp.siefp_enabled = 1; if (hv_isolation_type_snp() || hv_root_partition) { - hv_cpu->synic_event_page = - memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, - HV_HYP_PAGE_SIZE, MEMREMAP_WB); - + /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & + ~ms_hyperv.shared_gpa_boundary; + hv_cpu->synic_event_page + = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_event_page) - pr_err("Fail to map syinc event page.\n"); + pr_err("Fail to map synic event page.\n"); } else { siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) >> HV_HYP_PAGE_SHIFT; @@ -316,7 +319,7 @@ void hv_synic_disable_regs(unsigned int cpu) */ simp.simp_enabled = 0; if (hv_isolation_type_snp() || hv_root_partition) { - memunmap(hv_cpu->synic_message_page); + iounmap(hv_cpu->synic_message_page); hv_cpu->synic_message_page = NULL; } else { simp.base_simp_gpa = 0; @@ -328,7 +331,7 @@ void hv_synic_disable_regs(unsigned int cpu) siefp.siefp_enabled = 0; if (hv_isolation_type_snp() || hv_root_partition) { - memunmap(hv_cpu->synic_event_page); + iounmap(hv_cpu->synic_event_page); hv_cpu->synic_event_page = NULL; } else { siefp.base_siefp_gpa = 0; -- cgit From 2c6ba4216844ca7918289b49ed5f3f7138ee2402 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:07 -0700 Subject: PCI: hv: Enable PCI pass-thru devices in Confidential VMs For PCI pass-thru devices in a Confidential VM, Hyper-V requires that PCI config space be accessed via hypercalls. In normal VMs, config space accesses are trapped to the Hyper-V host and emulated. But in a confidential VM, the host can't access guest memory to decode the instruction for emulation, so an explicit hypercall must be used. Add functions to make the new MMIO read and MMIO write hypercalls. Update the PCI config space access functions to use the hypercalls when such use is indicated by Hyper-V flags. Also, set the flag to allow the Hyper-V PCI driver to be loaded and used in a Confidential VM (a.k.a., "Isolation VM"). The driver has previously been hardened against a malicious Hyper-V host[1]. [1] https://lore.kernel.org/all/20220511223207.3386-2-parri.andrea@gmail.com/ Co-developed-by: Dexuan Cui Signed-off-by: Dexuan Cui Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Reviewed-by: Haiyang Zhang Link: https://lore.kernel.org/r/1679838727-87310-13-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index cc23b90cae02..007f26d5f1a4 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -67,7 +67,7 @@ const struct vmbus_device vmbus_devs[] = { { .dev_type = HV_PCIE, HV_PCIE_GUID, .perf_device = false, - .allowed_in_isolated = false, + .allowed_in_isolated = true, }, /* Synthetic Frame Buffer */ -- cgit From 9a6b1a170ca8627d401fa76112e4920ba8c6314c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 8 Apr 2023 14:34:41 -0700 Subject: Drivers: hv: vmbus: Remove the per-CPU post_msg_page The post_msg_page was introduced in 2014 in commit b29ef3546aec ("Drivers: hv: vmbus: Cleanup hv_post_message()") Commit 68bb7bfb7985 ("X86/Hyper-V: Enable IPI enlightenments") introduced the hyperv_pcpu_input_arg in 2018, which can be used in hv_post_message(). Remove post_msg_page to simplify the code a little bit. Signed-off-by: Dexuan Cui Reviewed-by: Jinank Jain Link: https://lore.kernel.org/r/20230408213441.15472-1-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/hv.c | 20 +++++--------------- drivers/hv/hyperv_vmbus.h | 4 ---- 2 files changed, 5 insertions(+), 19 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 008234894d28..4e1407d59ba0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -84,14 +84,15 @@ int hv_post_message(union hv_connection_id connection_id, void *payload, size_t payload_size) { struct hv_input_post_message *aligned_msg; - struct hv_per_cpu_context *hv_cpu; + unsigned long flags; u64 status; if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) return -EMSGSIZE; - hv_cpu = get_cpu_ptr(hv_context.cpu_context); - aligned_msg = hv_cpu->post_msg_page; + local_irq_save(flags); + + aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); aligned_msg->connectionid = connection_id; aligned_msg->reserved = 0; aligned_msg->message_type = message_type; @@ -106,11 +107,7 @@ int hv_post_message(union hv_connection_id connection_id, status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); - /* Preemption must remain disabled until after the hypercall - * so some other thread can't get scheduled onto this cpu and - * corrupt the per-cpu post_msg_page - */ - put_cpu_ptr(hv_cpu); + local_irq_restore(flags); return hv_result(status); } @@ -162,12 +159,6 @@ int hv_synic_alloc(void) goto err; } } - - hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->post_msg_page == NULL) { - pr_err("Unable to allocate post msg page\n"); - goto err; - } } return 0; @@ -190,7 +181,6 @@ void hv_synic_free(void) free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); - free_page((unsigned long)hv_cpu->post_msg_page); } kfree(hv_context.hv_numa_map); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 167ac5115a78..55f2086841ae 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -122,10 +122,6 @@ enum { struct hv_per_cpu_context { void *synic_message_page; void *synic_event_page; - /* - * buffer to post messages to the host. - */ - void *post_msg_page; /* * Starting with win8, we can take channel interrupts on any CPU; -- cgit From d01b9a9f2d0131e1e249177a70e6b80d146d16d2 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Mon, 10 Apr 2023 22:55:31 -0700 Subject: Drivers: hv: Kconfig: Add HYPERV_VTL_MODE Add HYPERV_VTL_MODE Kconfig flag for VTL mode. Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1681192532-15460-5-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 94982f08b661..00242107d62e 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -13,6 +13,30 @@ config HYPERV Select this option to run Linux as a Hyper-V client operating system. +config HYPERV_VTL_MODE + bool "Enable Linux to boot in VTL context" + depends on X86_64 && HYPERV + default n + help + Virtual Secure Mode (VSM) is a set of hypervisor capabilities and + enlightenments offered to host and guest partitions which enables + the creation and management of new security boundaries within + operating system software. + + VSM achieves and maintains isolation through Virtual Trust Levels + (VTLs). Virtual Trust Levels are hierarchical, with higher levels + being more privileged than lower levels. VTL0 is the least privileged + level, and currently only other level supported is VTL2. + + Select this option to build a Linux kernel to run at a VTL other than + the normal VTL0, which currently is only VTL2. This option + initializes the x86 platform for VTL2, and adds the ability to boot + secondary CPUs directly into 64-bit context as required for VTLs other + than 0. A kernel built with this option must run at VTL2, and will + not run as a normal guest. + + If unsure, say N + config HYPERV_TIMER def_bool HYPERV && X86 -- cgit From 9c318a1d9b5000c77527011f158a75c5483510f5 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 20 Apr 2023 15:49:06 -0700 Subject: Drivers: hv: move panic report code from vmbus to hv early init code The panic reporting code was added in commit 81b18bce48af ("Drivers: HV: Send one page worth of kmsg dump over Hyper-V during panic") It was added to the vmbus driver. The panic reporting has no dependence on vmbus, and can be enabled at an earlier boot time when Hyper-V is initialized. This patch moves the panic reporting code out of vmbus. There is no functionality changes. During moving, also refactored some cleanup functions into hv_kmsg_dump_unregister(). Signed-off-by: Long Li Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1682030946-6372-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Wei Liu --- drivers/hv/hv.c | 36 -------- drivers/hv/hv_common.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/hv/vmbus_drv.c | 199 ------------------------------------------ 3 files changed, 231 insertions(+), 235 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 4e1407d59ba0..de6708dbe0df 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -38,42 +38,6 @@ int hv_init(void) return 0; } -/* - * Functions for allocating and freeing memory with size and - * alignment HV_HYP_PAGE_SIZE. These functions are needed because - * the guest page size may not be the same as the Hyper-V page - * size. We depend upon kmalloc() aligning power-of-two size - * allocations to the allocation size boundary, so that the - * allocated memory appears to Hyper-V as a page of the size - * it expects. - */ - -void *hv_alloc_hyperv_page(void) -{ - BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); - - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL); - else - return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} - -void *hv_alloc_hyperv_zeroed_page(void) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - else - return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} - -void hv_free_hyperv_page(unsigned long addr) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - free_page(addr); - else - kfree((void *)addr); -} - /* * hv_post_message - Post a message using the hypervisor message IPC. * diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 6d40b6c7b23b..64f9ceca887b 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -17,8 +17,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -54,6 +57,10 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); void * __percpu *hyperv_pcpu_output_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); +static void hv_kmsg_dump_unregister(void); + +static struct ctl_table_header *hv_ctl_table_hdr; + /* * Hyper-V specific initialization and shutdown code that is * common across all architectures. Called from architecture @@ -62,6 +69,12 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); void __init hv_common_free(void) { + unregister_sysctl_table(hv_ctl_table_hdr); + hv_ctl_table_hdr = NULL; + + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + hv_kmsg_dump_unregister(); + kfree(hv_vp_index); hv_vp_index = NULL; @@ -72,10 +85,203 @@ void __init hv_common_free(void) hyperv_pcpu_input_arg = NULL; } +/* + * Functions for allocating and freeing memory with size and + * alignment HV_HYP_PAGE_SIZE. These functions are needed because + * the guest page size may not be the same as the Hyper-V page + * size. We depend upon kmalloc() aligning power-of-two size + * allocations to the allocation size boundary, so that the + * allocated memory appears to Hyper-V as a page of the size + * it expects. + */ + +void *hv_alloc_hyperv_page(void) +{ + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + return (void *)__get_free_page(GFP_KERNEL); + else + return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); + +void *hv_alloc_hyperv_zeroed_page(void) +{ + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + else + return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); + +void hv_free_hyperv_page(unsigned long addr) +{ + if (PAGE_SIZE == HV_HYP_PAGE_SIZE) + free_page(addr); + else + kfree((void *)addr); +} +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); + +static void *hv_panic_page; + +/* + * Boolean to control whether to report panic messages over Hyper-V. + * + * It can be set via /proc/sys/kernel/hyperv_record_panic_msg + */ +static int sysctl_record_panic_msg = 1; + +/* + * sysctl option to allow the user to control whether kmsg data should be + * reported to Hyper-V on panic. + */ +static struct ctl_table hv_ctl_table[] = { + { + .procname = "hyperv_record_panic_msg", + .data = &sysctl_record_panic_msg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, + {} +}; + +static int hv_die_panic_notify_crash(struct notifier_block *self, + unsigned long val, void *args); + +static struct notifier_block hyperv_die_report_block = { + .notifier_call = hv_die_panic_notify_crash, +}; + +static struct notifier_block hyperv_panic_report_block = { + .notifier_call = hv_die_panic_notify_crash, +}; + +/* + * The following callback works both as die and panic notifier; its + * goal is to provide panic information to the hypervisor unless the + * kmsg dumper is used [see hv_kmsg_dump()], which provides more + * information but isn't always available. + * + * Notice that both the panic/die report notifiers are registered only + * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set. + */ +static int hv_die_panic_notify_crash(struct notifier_block *self, + unsigned long val, void *args) +{ + struct pt_regs *regs; + bool is_die; + + /* Don't notify Hyper-V unless we have a die oops event or panic. */ + if (self == &hyperv_panic_report_block) { + is_die = false; + regs = current_pt_regs(); + } else { /* die event */ + if (val != DIE_OOPS) + return NOTIFY_DONE; + + is_die = true; + regs = ((struct die_args *)args)->regs; + } + + /* + * Hyper-V should be notified only once about a panic/die. If we will + * be calling hv_kmsg_dump() later with kmsg data, don't do the + * notification here. + */ + if (!sysctl_record_panic_msg || !hv_panic_page) + hyperv_report_panic(regs, val, is_die); + + return NOTIFY_DONE; +} + +/* + * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg + * buffer and call into Hyper-V to transfer the data. + */ +static void hv_kmsg_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + struct kmsg_dump_iter iter; + size_t bytes_written; + + /* We are only interested in panics. */ + if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg) + return; + + /* + * Write dump contents to the page. No need to synchronize; panic should + * be single-threaded. + */ + kmsg_dump_rewind(&iter); + kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, + &bytes_written); + if (!bytes_written) + return; + /* + * P3 to contain the physical address of the panic page & P4 to + * contain the size of the panic data in that page. Rest of the + * registers are no-op when the NOTIFY_MSG flag is set. + */ + hv_set_register(HV_REGISTER_CRASH_P0, 0); + hv_set_register(HV_REGISTER_CRASH_P1, 0); + hv_set_register(HV_REGISTER_CRASH_P2, 0); + hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page)); + hv_set_register(HV_REGISTER_CRASH_P4, bytes_written); + + /* + * Let Hyper-V know there is crash data available along with + * the panic message. + */ + hv_set_register(HV_REGISTER_CRASH_CTL, + (HV_CRASH_CTL_CRASH_NOTIFY | + HV_CRASH_CTL_CRASH_NOTIFY_MSG)); +} + +static struct kmsg_dumper hv_kmsg_dumper = { + .dump = hv_kmsg_dump, +}; + +static void hv_kmsg_dump_unregister(void) +{ + kmsg_dump_unregister(&hv_kmsg_dumper); + unregister_die_notifier(&hyperv_die_report_block); + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_report_block); + + hv_free_hyperv_page((unsigned long)hv_panic_page); + hv_panic_page = NULL; +} + +static void hv_kmsg_dump_register(void) +{ + int ret; + + hv_panic_page = hv_alloc_hyperv_zeroed_page(); + if (!hv_panic_page) { + pr_err("Hyper-V: panic message page memory allocation failed\n"); + return; + } + + ret = kmsg_dump_register(&hv_kmsg_dumper); + if (ret) { + pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); + hv_free_hyperv_page((unsigned long)hv_panic_page); + hv_panic_page = NULL; + } +} + int __init hv_common_init(void) { int i; + if (hv_is_isolation_supported()) + sysctl_record_panic_msg = 0; + /* * Hyper-V expects to get crash register data or kmsg when * crash enlightment is available and system crashes. Set @@ -84,8 +290,33 @@ int __init hv_common_init(void) * kernel. */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + u64 hyperv_crash_ctl; + crash_kexec_post_notifiers = true; pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n"); + + /* + * Panic message recording (sysctl_record_panic_msg) + * is enabled by default in non-isolated guests and + * disabled by default in isolated guests; the panic + * message recording won't be available in isolated + * guests should the following registration fail. + */ + hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table); + if (!hv_ctl_table_hdr) + pr_err("Hyper-V: sysctl table register error"); + + /* + * Register for panic kmsg callback only if the right + * capability is supported by the hypervisor. + */ + hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL); + if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) + hv_kmsg_dump_register(); + + register_die_notifier(&hyperv_die_report_block); + atomic_notifier_chain_register(&panic_notifier_list, + &hyperv_panic_report_block); } /* diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 19a0069dc4a9..309c78ce1d30 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -50,26 +49,12 @@ static struct device *hv_dev; static int hyperv_cpuhp_online; -static void *hv_panic_page; - static long __percpu *vmbus_evt; /* Values parsed from ACPI DSDT */ int vmbus_irq; int vmbus_interrupt; -/* - * Boolean to control whether to report panic messages over Hyper-V. - * - * It can be set via /proc/sys/kernel/hyperv_record_panic_msg - */ -static int sysctl_record_panic_msg = 1; - -static int hyperv_report_reg(void) -{ - return !sysctl_record_panic_msg || !hv_panic_page; -} - /* * The panic notifier below is responsible solely for unloading the * vmbus connection, which is necessary in a panic event. @@ -90,54 +75,6 @@ static struct notifier_block hyperv_panic_vmbus_unload_block = { .priority = INT_MIN + 1, /* almost the latest one to execute */ }; -static int hv_die_panic_notify_crash(struct notifier_block *self, - unsigned long val, void *args); - -static struct notifier_block hyperv_die_report_block = { - .notifier_call = hv_die_panic_notify_crash, -}; -static struct notifier_block hyperv_panic_report_block = { - .notifier_call = hv_die_panic_notify_crash, -}; - -/* - * The following callback works both as die and panic notifier; its - * goal is to provide panic information to the hypervisor unless the - * kmsg dumper is used [see hv_kmsg_dump()], which provides more - * information but isn't always available. - * - * Notice that both the panic/die report notifiers are registered only - * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set. - */ -static int hv_die_panic_notify_crash(struct notifier_block *self, - unsigned long val, void *args) -{ - struct pt_regs *regs; - bool is_die; - - /* Don't notify Hyper-V unless we have a die oops event or panic. */ - if (self == &hyperv_panic_report_block) { - is_die = false; - regs = current_pt_regs(); - } else { /* die event */ - if (val != DIE_OOPS) - return NOTIFY_DONE; - - is_die = true; - regs = ((struct die_args *)args)->regs; - } - - /* - * Hyper-V should be notified only once about a panic/die. If we will - * be calling hv_kmsg_dump() later with kmsg data, don't do the - * notification here. - */ - if (hyperv_report_reg()) - hyperv_report_panic(regs, val, is_die); - - return NOTIFY_DONE; -} - static const char *fb_mmio_name = "fb_range"; static struct resource *fb_mmio; static struct resource *hyperv_mmio; @@ -1379,98 +1316,6 @@ static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) return IRQ_HANDLED; } -/* - * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg - * buffer and call into Hyper-V to transfer the data. - */ -static void hv_kmsg_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) -{ - struct kmsg_dump_iter iter; - size_t bytes_written; - - /* We are only interested in panics. */ - if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) - return; - - /* - * Write dump contents to the page. No need to synchronize; panic should - * be single-threaded. - */ - kmsg_dump_rewind(&iter); - kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, - &bytes_written); - if (!bytes_written) - return; - /* - * P3 to contain the physical address of the panic page & P4 to - * contain the size of the panic data in that page. Rest of the - * registers are no-op when the NOTIFY_MSG flag is set. - */ - hv_set_register(HV_REGISTER_CRASH_P0, 0); - hv_set_register(HV_REGISTER_CRASH_P1, 0); - hv_set_register(HV_REGISTER_CRASH_P2, 0); - hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page)); - hv_set_register(HV_REGISTER_CRASH_P4, bytes_written); - - /* - * Let Hyper-V know there is crash data available along with - * the panic message. - */ - hv_set_register(HV_REGISTER_CRASH_CTL, - (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); -} - -static struct kmsg_dumper hv_kmsg_dumper = { - .dump = hv_kmsg_dump, -}; - -static void hv_kmsg_dump_register(void) -{ - int ret; - - hv_panic_page = hv_alloc_hyperv_zeroed_page(); - if (!hv_panic_page) { - pr_err("Hyper-V: panic message page memory allocation failed\n"); - return; - } - - ret = kmsg_dump_register(&hv_kmsg_dumper); - if (ret) { - pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); - hv_free_hyperv_page((unsigned long)hv_panic_page); - hv_panic_page = NULL; - } -} - -static struct ctl_table_header *hv_ctl_table_hdr; - -/* - * sysctl option to allow the user to control whether kmsg data should be - * reported to Hyper-V on panic. - */ -static struct ctl_table hv_ctl_table[] = { - { - .procname = "hyperv_record_panic_msg", - .data = &sysctl_record_panic_msg, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE - }, - {} -}; - -static struct ctl_table hv_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = hv_ctl_table - }, - {} -}; - /* * vmbus_bus_init -Main vmbus driver initialization routine. * @@ -1534,38 +1379,6 @@ static int vmbus_bus_init(void) if (ret) goto err_connect; - if (hv_is_isolation_supported()) - sysctl_record_panic_msg = 0; - - /* - * Only register if the crash MSRs are available - */ - if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { - u64 hyperv_crash_ctl; - /* - * Panic message recording (sysctl_record_panic_msg) - * is enabled by default in non-isolated guests and - * disabled by default in isolated guests; the panic - * message recording won't be available in isolated - * guests should the following registration fail. - */ - hv_ctl_table_hdr = register_sysctl_table(hv_root_table); - if (!hv_ctl_table_hdr) - pr_err("Hyper-V: sysctl table register error"); - - /* - * Register for panic kmsg callback only if the right - * capability is supported by the hypervisor. - */ - hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL); - if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) - hv_kmsg_dump_register(); - - register_die_notifier(&hyperv_die_report_block); - atomic_notifier_chain_register(&panic_notifier_list, - &hyperv_panic_report_block); - } - /* * Always register the vmbus unload panic notifier because we * need to shut the VMbus channel connection on panic. @@ -1590,8 +1403,6 @@ err_alloc: } err_setup: bus_unregister(&hv_bus); - unregister_sysctl_table(hv_ctl_table_hdr); - hv_ctl_table_hdr = NULL; return ret; } @@ -2887,13 +2698,6 @@ static void __exit vmbus_exit(void) vmbus_free_channels(); kfree(vmbus_connection.channels); - if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { - kmsg_dump_unregister(&hv_kmsg_dumper); - unregister_die_notifier(&hyperv_die_report_block); - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_report_block); - } - /* * The vmbus panic notifier is always registered, hence we should * also unconditionally unregister it here as well. @@ -2901,9 +2705,6 @@ static void __exit vmbus_exit(void) atomic_notifier_chain_unregister(&panic_notifier_list, &hyperv_panic_vmbus_unload_block); - free_page((unsigned long)hv_panic_page); - unregister_sysctl_table(hv_ctl_table_hdr); - hv_ctl_table_hdr = NULL; bus_unregister(&hv_bus); cpuhp_remove_state(hyperv_cpuhp_online); -- cgit