diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/Kconfig | 7 | ||||
-rw-r--r-- | drivers/hv/channel.c | 1 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 1 | ||||
-rw-r--r-- | drivers/hv/connection.c | 28 | ||||
-rw-r--r-- | drivers/hv/hv.c | 6 | ||||
-rw-r--r-- | drivers/hv/hv_common.c | 76 | ||||
-rw-r--r-- | drivers/hv/hv_proc.c | 1 | ||||
-rw-r--r-- | drivers/hv/mshv_common.c | 1 | ||||
-rw-r--r-- | drivers/hv/mshv_root_hv_call.c | 1 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 1 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 98 |
11 files changed, 156 insertions, 65 deletions
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 6c1416167bd2..57623ca7f350 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -5,17 +5,18 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ - || (ACPI && ARM64 && !CPU_BIG_ENDIAN) + || (ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 select OF_EARLY_FLATTREE if OF + select SYSFB if EFI && !HYPERV_VTL_MODE help Select this option to run Linux as a Hyper-V client operating system. config HYPERV_VTL_MODE bool "Enable Linux to boot in VTL context" - depends on X86_64 && HYPERV + depends on (X86_64 || ARM64) && HYPERV depends on SMP default n help @@ -31,7 +32,7 @@ config HYPERV_VTL_MODE Select this option to build a Linux kernel to run at a VTL other than the normal VTL0, which currently is only VTL2. This option - initializes the x86 platform for VTL2, and adds the ability to boot + initializes the kernel to run in VTL2, and adds the ability to boot secondary CPUs directly into 64-bit context as required for VTLs other than 0. A kernel built with this option must run at VTL2, and will not run as a normal guest. diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 35f26fa1ffe7..7c7c66e0dc3f 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -18,6 +18,7 @@ #include <linux/uio.h> #include <linux/interrupt.h> #include <linux/set_memory.h> +#include <linux/export.h> #include <asm/page.h> #include <asm/mshyperv.h> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 6e084c207414..65dd299e2944 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -20,6 +20,7 @@ #include <linux/delay.h> #include <linux/cpu.h> #include <linux/hyperv.h> +#include <linux/export.h> #include <asm/mshyperv.h> #include <linux/sched/isolation.h> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 8351360bba16..1fe3573ae52a 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -207,10 +207,19 @@ int vmbus_connect(void) mutex_init(&vmbus_connection.channel_mutex); /* + * The following Hyper-V interrupt and monitor pages can be used by + * UIO for mapping to user-space, so they should always be allocated on + * system page boundaries. The system page size must be >= the Hyper-V + * page size. + */ + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + + /* * Setup the vmbus event connection for channel interrupt * abstraction stuff */ - vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page(); + vmbus_connection.int_page = + (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); if (vmbus_connection.int_page == NULL) { ret = -ENOMEM; goto cleanup; @@ -225,8 +234,8 @@ int vmbus_connect(void) * Setup the monitor notification facility. The 1st page for * parent->child and the 2nd page for child->parent */ - vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page(); - vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page(); + vmbus_connection.monitor_pages[0] = (void *)__get_free_page(GFP_KERNEL); + vmbus_connection.monitor_pages[1] = (void *)__get_free_page(GFP_KERNEL); if ((vmbus_connection.monitor_pages[0] == NULL) || (vmbus_connection.monitor_pages[1] == NULL)) { ret = -ENOMEM; @@ -342,21 +351,23 @@ void vmbus_disconnect(void) destroy_workqueue(vmbus_connection.work_queue); if (vmbus_connection.int_page) { - hv_free_hyperv_page(vmbus_connection.int_page); + free_page((unsigned long)vmbus_connection.int_page); vmbus_connection.int_page = NULL; } if (vmbus_connection.monitor_pages[0]) { if (!set_memory_encrypted( (unsigned long)vmbus_connection.monitor_pages[0], 1)) - hv_free_hyperv_page(vmbus_connection.monitor_pages[0]); + free_page((unsigned long) + vmbus_connection.monitor_pages[0]); vmbus_connection.monitor_pages[0] = NULL; } if (vmbus_connection.monitor_pages[1]) { if (!set_memory_encrypted( (unsigned long)vmbus_connection.monitor_pages[1], 1)) - hv_free_hyperv_page(vmbus_connection.monitor_pages[1]); + free_page((unsigned long) + vmbus_connection.monitor_pages[1]); vmbus_connection.monitor_pages[1] = NULL; } } @@ -508,7 +519,10 @@ void vmbus_set_event(struct vmbus_channel *channel) else WARN_ON_ONCE(1); } else { - hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); + u64 control = HVCALL_SIGNAL_EVENT; + + control |= hv_nested ? HV_HYPERCALL_NESTED : 0; + hv_do_fast_hypercall8(control, channel->sig_event); } } EXPORT_SYMBOL_GPL(vmbus_set_event); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 308c8f279df8..b14c5f9e0ef2 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -85,8 +85,10 @@ int hv_post_message(union hv_connection_id connection_id, else status = HV_STATUS_INVALID_PARAMETER; } else { - status = hv_do_hypercall(HVCALL_POST_MESSAGE, - aligned_msg, NULL); + u64 control = HVCALL_POST_MESSAGE; + + control |= hv_nested ? HV_HYPERCALL_NESTED : 0; + status = hv_do_hypercall(control, aligned_msg, NULL); } local_irq_restore(flags); diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 59792e00cecf..49898d10faff 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -105,45 +105,6 @@ void __init hv_common_free(void) hv_synic_eventring_tail = NULL; } -/* - * Functions for allocating and freeing memory with size and - * alignment HV_HYP_PAGE_SIZE. These functions are needed because - * the guest page size may not be the same as the Hyper-V page - * size. We depend upon kmalloc() aligning power-of-two size - * allocations to the allocation size boundary, so that the - * allocated memory appears to Hyper-V as a page of the size - * it expects. - */ - -void *hv_alloc_hyperv_page(void) -{ - BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); - - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL); - else - return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); - -void *hv_alloc_hyperv_zeroed_page(void) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - else - return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); - -void hv_free_hyperv_page(void *addr) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - free_page((unsigned long)addr); - else - kfree(addr); -} -EXPORT_SYMBOL_GPL(hv_free_hyperv_page); - static void *hv_panic_page; /* @@ -272,7 +233,7 @@ static void hv_kmsg_dump_unregister(void) atomic_notifier_chain_unregister(&panic_notifier_list, &hyperv_panic_report_block); - hv_free_hyperv_page(hv_panic_page); + kfree(hv_panic_page); hv_panic_page = NULL; } @@ -280,7 +241,7 @@ static void hv_kmsg_dump_register(void) { int ret; - hv_panic_page = hv_alloc_hyperv_zeroed_page(); + hv_panic_page = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!hv_panic_page) { pr_err("Hyper-V: panic message page memory allocation failed\n"); return; @@ -289,7 +250,7 @@ static void hv_kmsg_dump_register(void) ret = kmsg_dump_register(&hv_kmsg_dumper); if (ret) { pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); - hv_free_hyperv_page(hv_panic_page); + kfree(hv_panic_page); hv_panic_page = NULL; } } @@ -317,6 +278,37 @@ void __init hv_get_partition_id(void) pr_err("Hyper-V: failed to get partition ID: %#x\n", hv_result(status)); } +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) +u8 __init get_vtl(void) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; + struct hv_input_get_vp_registers *input; + struct hv_output_get_vp_registers *output; + unsigned long flags; + u64 ret; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + memset(input, 0, struct_size(input, names, 1)); + input->partition_id = HV_PARTITION_ID_SELF; + input->vp_index = HV_VP_INDEX_SELF; + input->input_vtl.as_uint8 = 0; + input->names[0] = HV_REGISTER_VSM_VP_STATUS; + + ret = hv_do_hypercall(control, input, output); + if (hv_result_success(ret)) { + ret = output->values[0].reg8 & HV_VTL_MASK; + } else { + pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); + BUG(); + } + + local_irq_restore(flags); + return ret; +} +#endif int __init hv_common_init(void) { diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c index 7d7ecb6f6137..fbb4eb3901bb 100644 --- a/drivers/hv/hv_proc.c +++ b/drivers/hv/hv_proc.c @@ -6,6 +6,7 @@ #include <linux/slab.h> #include <linux/cpuhotplug.h> #include <linux/minmax.h> +#include <linux/export.h> #include <asm/mshyperv.h> /* diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c index 2575e6d7a71f..6f227a8a5af7 100644 --- a/drivers/hv/mshv_common.c +++ b/drivers/hv/mshv_common.c @@ -13,6 +13,7 @@ #include <linux/mm.h> #include <asm/mshyperv.h> #include <linux/resume_user_mode.h> +#include <linux/export.h> #include "mshv.h" diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c index a222a16107f6..c9c274f29c3c 100644 --- a/drivers/hv/mshv_root_hv_call.c +++ b/drivers/hv/mshv_root_hv_call.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/export.h> #include <asm/mshyperv.h> #include "mshv_root.h" diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 3c9b02471760..23ce1fb70de1 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/prefetch.h> #include <linux/io.h> +#include <linux/export.h> #include <asm/mshyperv.h> #include "hyperv_vmbus.h" diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index e3d51a316316..2ed5a1e89d69 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -45,7 +45,8 @@ struct vmbus_dynid { struct hv_vmbus_device_id id; }; -static struct device *hv_dev; +/* VMBus Root Device */ +static struct device *vmbus_root_device; static int hyperv_cpuhp_online; @@ -80,9 +81,15 @@ static struct resource *fb_mmio; static struct resource *hyperv_mmio; static DEFINE_MUTEX(hyperv_mmio_lock); +struct device *hv_get_vmbus_root_device(void) +{ + return vmbus_root_device; +} +EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device); + static int vmbus_exists(void) { - if (hv_dev == NULL) + if (vmbus_root_device == NULL) return -ENODEV; return 0; @@ -707,7 +714,30 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(const struct hv_driver * return id; } -/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ +/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices + * + * This function can race with vmbus_device_register(). This function is + * typically running on a user thread in response to writing to the "new_id" + * sysfs entry for a driver. vmbus_device_register() is running on a + * workqueue thread in response to the Hyper-V host offering a device to the + * guest. This function calls driver_attach(), which looks for an existing + * device matching the new id, and attaches the driver to which the new id + * has been assigned. vmbus_device_register() calls device_register(), which + * looks for a driver that matches the device being registered. If both + * operations are running simultaneously, the device driver probe function runs + * on whichever thread establishes the linkage between the driver and device. + * + * In most cases, it doesn't matter which thread runs the driver probe + * function. But if vmbus_device_register() does not find a matching driver, + * it proceeds to create the "channels" subdirectory and numbered per-channel + * subdirectory in sysfs. While that multi-step creation is in progress, this + * function could run the driver probe function. If the probe function checks + * for, or operates on, entries in the "channels" subdirectory, including by + * calling hv_create_ring_sysfs(), the operation may or may not succeed + * depending on the race. The race can't create a kernel failure in VMBus + * or device subsystem code, but probe functions in VMBus drivers doing such + * operations must be prepared for the failure case. + */ static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) { struct vmbus_dynid *dynid; @@ -861,7 +891,7 @@ static int vmbus_dma_configure(struct device *child_device) * On x86/x64 coherence is assumed and these calls have no effect. */ hv_setup_dma_ops(child_device, - device_get_dma_attr(hv_dev) == DEV_DMA_COHERENT); + device_get_dma_attr(vmbus_root_device) == DEV_DMA_COHERENT); return 0; } @@ -1841,7 +1871,7 @@ static struct attribute *vmbus_chan_attrs[] = { NULL }; -static struct bin_attribute *vmbus_chan_bin_attrs[] = { +static const struct bin_attribute *vmbus_chan_bin_attrs[] = { &chan_attr_ring_buffer, NULL }; @@ -1921,7 +1951,8 @@ static const struct kobj_type vmbus_chan_ktype = { * ring for userspace to use. * Note: Race conditions can happen with userspace and it is not encouraged to create new * use-cases for this. This was added to maintain backward compatibility, while solving - * one of the race conditions in uio_hv_generic while creating sysfs. + * one of the race conditions in uio_hv_generic while creating sysfs. See comments with + * vmbus_add_dynid() and vmbus_device_register(). * * Returns 0 on success or error code on failure. */ @@ -2037,7 +2068,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) &child_device_obj->channel->offermsg.offer.if_instance); child_device_obj->device.bus = &hv_bus; - child_device_obj->device.parent = hv_dev; + child_device_obj->device.parent = vmbus_root_device; child_device_obj->device.release = vmbus_device_release; child_device_obj->device.dma_parms = &child_device_obj->dma_parms; @@ -2055,6 +2086,20 @@ int vmbus_device_register(struct hv_device *child_device_obj) return ret; } + /* + * If device_register() found a driver to assign to the device, the + * driver's probe function has already run at this point. If that + * probe function accesses or operates on the "channels" subdirectory + * in sysfs, those operations will have failed because the "channels" + * subdirectory doesn't exist until the code below runs. Or if the + * probe function creates a /dev entry, a user space program could + * find and open the /dev entry, and then create a race by accessing + * the "channels" subdirectory while the creation steps are in progress + * here. The race can't result in a kernel failure, but the user space + * program may get an error in accessing "channels" or its + * subdirectories. See also comments with vmbus_add_dynid() about a + * related race condition. + */ child_device_obj->channels_kset = kset_create_and_add("channels", NULL, kobj); if (!child_device_obj->channels_kset) { @@ -2412,7 +2457,7 @@ static int vmbus_acpi_add(struct platform_device *pdev) struct acpi_device *ancestor; struct acpi_device *device = ACPI_COMPANION(&pdev->dev); - hv_dev = &device->dev; + vmbus_root_device = &device->dev; /* * Older versions of Hyper-V for ARM64 fail to include the _CCA @@ -2464,6 +2509,32 @@ static int vmbus_acpi_add(struct platform_device *pdev) return 0; } #endif +#ifndef HYPERVISOR_CALLBACK_VECTOR +static int vmbus_set_irq(struct platform_device *pdev) +{ + struct irq_data *data; + int irq; + irq_hw_number_t hwirq; + + irq = platform_get_irq(pdev, 0); + /* platform_get_irq() may not return 0. */ + if (irq < 0) + return irq; + + data = irq_get_irq_data(irq); + if (!data) { + pr_err("No interrupt data for VMBus virq %d\n", irq); + return -ENODEV; + } + hwirq = irqd_to_hwirq(data); + + vmbus_irq = irq; + vmbus_interrupt = hwirq; + pr_debug("VMBus virq %d, hwirq %d\n", vmbus_irq, vmbus_interrupt); + + return 0; +} +#endif static int vmbus_device_add(struct platform_device *pdev) { @@ -2473,12 +2544,17 @@ static int vmbus_device_add(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; int ret; - hv_dev = &pdev->dev; + vmbus_root_device = &pdev->dev; ret = of_range_parser_init(&parser, np); if (ret) return ret; +#ifndef HYPERVISOR_CALLBACK_VECTOR + ret = vmbus_set_irq(pdev); + if (ret) + return ret; +#endif for_each_of_range(&parser, &range) { struct resource *res; @@ -2786,7 +2862,7 @@ static int __init hv_acpi_init(void) if (ret) return ret; - if (!hv_dev) { + if (!vmbus_root_device) { ret = -ENODEV; goto cleanup; } @@ -2817,7 +2893,7 @@ static int __init hv_acpi_init(void) cleanup: platform_driver_unregister(&vmbus_platform_driver); - hv_dev = NULL; + vmbus_root_device = NULL; return ret; } |