diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-25 09:22:55 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-25 09:22:55 -0800 |
commit | 382e391365ca12d1e5a15f109ba8b4609d58db6b (patch) | |
tree | 82f10864a9e2898a811ae46b60fcf765456e37ab /drivers | |
parent | b46c89c08f4146e7987fc355941a93b12e2c03ef (diff) | |
parent | 2e03358be78b65d28b66e17aca9e0c8700b0df78 (diff) |
Merge tag 'hyperv-next-signed-20250123' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu:
- Introduce a new set of Hyper-V headers in include/hyperv and replace
the old hyperv-tlfs.h with the new headers (Nuno Das Neves)
- Fixes for the Hyper-V VTL mode (Roman Kisel)
- Fixes for cpu mask usage in Hyper-V code (Michael Kelley)
- Document the guest VM hibernation behaviour (Michael Kelley)
- Miscellaneous fixes and cleanups (Jacob Pan, John Starks, Naman Jain)
* tag 'hyperv-next-signed-20250123' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
Documentation: hyperv: Add overview of guest VM hibernation
hyperv: Do not overlap the hvcall IO areas in hv_vtl_apicid_to_vp_id()
hyperv: Do not overlap the hvcall IO areas in get_vtl()
hyperv: Enable the hypercall output page for the VTL mode
hv_balloon: Fallback to generic_online_page() for non-HV hot added mem
Drivers: hv: vmbus: Log on missing offers if any
Drivers: hv: vmbus: Wait for boot-time offers during boot and resume
uio_hv_generic: Add a check for HV_NIC for send, receive buffers setup
iommu/hyper-v: Don't assume cpu_possible_mask is dense
Drivers: hv: Don't assume cpu_possible_mask is dense
x86/hyperv: Don't assume cpu_possible_mask is dense
hyperv: Remove the now unused hyperv-tlfs.h files
hyperv: Switch from hyperv-tlfs.h to hyperv/hvhdk.h
hyperv: Add new Hyper-V headers in include/hyperv
hyperv: Clean up unnecessary #includes
hyperv: Move hv_connection_id to hyperv-tlfs.h
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/clocksource/hyperv_timer.c | 2 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 61 | ||||
-rw-r--r-- | drivers/hv/connection.c | 4 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 22 | ||||
-rw-r--r-- | drivers/hv/hv_common.c | 17 | ||||
-rw-r--r-- | drivers/hv/hv_kvp.c | 2 | ||||
-rw-r--r-- | drivers/hv/hv_snapshot.c | 2 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 16 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 31 | ||||
-rw-r--r-- | drivers/iommu/hyperv-iommu.c | 4 | ||||
-rw-r--r-- | drivers/uio/uio_hv_generic.c | 86 |
11 files changed, 139 insertions, 108 deletions
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index b39dee7b93af..f00019b078a7 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -23,7 +23,7 @@ #include <linux/acpi.h> #include <linux/hyperv.h> #include <clocksource/hyperv_timer.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include <asm/mshyperv.h> static struct clock_event_device __percpu *hv_clock_event; diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 3c6011a48dab..6e084c207414 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -944,16 +944,6 @@ void vmbus_initiate_unload(bool crash) vmbus_wait_for_unload(); } -static void check_ready_for_resume_event(void) -{ - /* - * If all the old primary channels have been fixed up, then it's safe - * to resume. - */ - if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume)) - complete(&vmbus_connection.ready_for_resume_event); -} - static void vmbus_setup_channel_state(struct vmbus_channel *channel, struct vmbus_channel_offer_channel *offer) { @@ -1109,8 +1099,6 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) /* Add the channel back to the array of channels. */ vmbus_channel_map_relid(oldchannel); - check_ready_for_resume_event(); - mutex_unlock(&vmbus_connection.channel_mutex); return; } @@ -1296,13 +1284,28 @@ EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); /* * vmbus_onoffers_delivered - - * This is invoked when all offers have been delivered. + * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all + * boot-time offers are delivered. A boot-time offer is for the primary + * channel for any virtual hardware configured in the VM at the time it boots. + * Boot-time offers include offers for physical devices assigned to the VM + * via Hyper-V's Discrete Device Assignment (DDA) functionality that are + * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs). + * Boot-time offers do not include offers for VMBus sub-channels. Because + * devices can be hot-added to the VM after it is booted, additional channel + * offers that aren't boot-time offers can be received at any time after the + * all-offers-delivered message. * - * Nothing to do here. + * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered + * to be assigned to the VM at boot-time, and offers for VFs may occur after + * the all-offers-delivered message. VFs are optional accelerators to the + * synthetic VMBus NIC and are effectively hot-added only after the VMBus + * NIC channel is opened (once it knows the guest can support it, via the + * sriov bit in the netvsc protocol). */ static void vmbus_onoffers_delivered( struct vmbus_channel_message_header *hdr) { + complete(&vmbus_connection.all_offers_delivered_event); } /* @@ -1578,7 +1581,8 @@ void vmbus_onmessage(struct vmbus_channel_message_header *hdr) } /* - * vmbus_request_offers - Send a request to get all our pending offers. + * vmbus_request_offers - Send a request to get all our pending offers + * and wait for all boot-time offers to arrive. */ int vmbus_request_offers(void) { @@ -1596,6 +1600,10 @@ int vmbus_request_offers(void) msg->msgtype = CHANNELMSG_REQUESTOFFERS; + /* + * This REQUESTOFFERS message will result in the host sending an all + * offers delivered message after all the boot-time offers are sent. + */ ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), true); @@ -1607,6 +1615,29 @@ int vmbus_request_offers(void) goto cleanup; } + /* + * Wait for the host to send all boot-time offers. + * Keeping it as a best-effort mechanism, where a warning is + * printed if a timeout occurs, and execution is resumed. + */ + if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event, + secs_to_jiffies(60))) { + pr_warn("timed out waiting for all boot-time offers to be delivered.\n"); + } + + /* + * Flush handling of offer messages (which may initiate work on + * other work queues). + */ + flush_workqueue(vmbus_connection.work_queue); + + /* + * Flush workqueue for processing the incoming offers. Subchannel + * offers and their processing can happen later, so there is no need to + * flush that workqueue here. + */ + flush_workqueue(vmbus_connection.handle_primary_chan_wq); + cleanup: kfree(msginfo); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index f001ae880e1d..8351360bba16 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -34,8 +34,8 @@ struct vmbus_connection vmbus_connection = { .ready_for_suspend_event = COMPLETION_INITIALIZER( vmbus_connection.ready_for_suspend_event), - .ready_for_resume_event = COMPLETION_INITIALIZER( - vmbus_connection.ready_for_resume_event), + .all_offers_delivered_event = COMPLETION_INITIALIZER( + vmbus_connection.all_offers_delivered_event), }; EXPORT_SYMBOL_GPL(vmbus_connection); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index a99112e6f0b8..fec2f18679e3 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -28,7 +28,7 @@ #include <linux/sizes.h> #include <linux/hyperv.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include <asm/mshyperv.h> @@ -766,16 +766,18 @@ static void hv_online_page(struct page *pg, unsigned int order) struct hv_hotadd_state *has; unsigned long pfn = page_to_pfn(pg); - guard(spinlock_irqsave)(&dm_device.ha_lock); - list_for_each_entry(has, &dm_device.ha_region_list, list) { - /* The page belongs to a different HAS. */ - if (pfn < has->start_pfn || - (pfn + (1UL << order) > has->end_pfn)) - continue; + scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { + list_for_each_entry(has, &dm_device.ha_region_list, list) { + /* The page belongs to a different HAS. */ + if (pfn < has->start_pfn || + (pfn + (1UL << order) > has->end_pfn)) + continue; - hv_bring_pgs_online(has, pfn, 1UL << order); - break; + hv_bring_pgs_online(has, pfn, 1UL << order); + return; + } } + generic_online_page(pg, order); } static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) @@ -1586,7 +1588,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, return -ENOSPC; } - hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD; + hint->heat_type = HV_EXTMEM_HEAT_HINT_COLD_DISCARD; hint->reserved = 0; for_each_sg(sgl, sg, nents, i) { union hv_gpa_page_range *range; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 7a35c82976e0..af5d1dc451f6 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -28,7 +28,7 @@ #include <linux/slab.h> #include <linux/dma-map-ops.h> #include <linux/set_memory.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include <asm/mshyperv.h> /* @@ -278,6 +278,11 @@ static void hv_kmsg_dump_register(void) } } +static inline bool hv_output_page_exists(void) +{ + return hv_root_partition || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); +} + int __init hv_common_init(void) { int i; @@ -340,19 +345,19 @@ int __init hv_common_init(void) BUG_ON(!hyperv_pcpu_input_arg); /* Allocate the per-CPU state for output arg for root */ - if (hv_root_partition) { + if (hv_output_page_exists()) { hyperv_pcpu_output_arg = alloc_percpu(void *); BUG_ON(!hyperv_pcpu_output_arg); } - hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), + hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index), GFP_KERNEL); if (!hv_vp_index) { hv_common_free(); return -ENOMEM; } - for (i = 0; i < num_possible_cpus(); i++) + for (i = 0; i < nr_cpu_ids; i++) hv_vp_index[i] = VP_INVAL; return 0; @@ -435,7 +440,7 @@ int hv_common_cpu_init(unsigned int cpu) void **inputarg, **outputarg; u64 msr_vp_index; gfp_t flags; - int pgcount = hv_root_partition ? 2 : 1; + const int pgcount = hv_output_page_exists() ? 2 : 1; void *mem; int ret; @@ -453,7 +458,7 @@ int hv_common_cpu_init(unsigned int cpu) if (!mem) return -ENOMEM; - if (hv_root_partition) { + if (hv_output_page_exists()) { outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); *outputarg = (char *)mem + HV_HYP_PAGE_SIZE; } diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 7400a5a4d2bd..62795f6cbb00 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -27,7 +27,7 @@ #include <linux/connector.h> #include <linux/workqueue.h> #include <linux/hyperv.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include "hyperv_vmbus.h" #include "hv_utils_transport.h" diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index bde637a96c37..2e7f537d53cf 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -12,7 +12,7 @@ #include <linux/connector.h> #include <linux/workqueue.h> #include <linux/hyperv.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include "hyperv_vmbus.h" #include "hv_utils_transport.h" diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 52cb744b4d7f..29780f3a7478 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -15,10 +15,10 @@ #include <linux/list.h> #include <linux/bitops.h> #include <asm/sync_bitops.h> -#include <asm/hyperv-tlfs.h> #include <linux/atomic.h> #include <linux/hyperv.h> #include <linux/interrupt.h> +#include <hyperv/hvhdk.h> #include "hv_trace.h" @@ -287,18 +287,10 @@ struct vmbus_connection { struct completion ready_for_suspend_event; /* - * The number of primary channels that should be "fixed up" - * upon resume: these channels are re-offered upon resume, and some - * fields of the channel offers (i.e. child_relid and connection_id) - * can change, so the old offermsg must be fixed up, before the resume - * callbacks of the VSC drivers start to further touch the channels. + * Completed once the host has offered all boot-time channels. + * Note that some channels may still be under process on a workqueue. */ - atomic_t nr_chan_fixup_on_resume; - /* - * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to - * drop to zero. - */ - struct completion ready_for_resume_event; + struct completion all_offers_delivered_event; }; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2892b8da20a5..0f6cd44fff29 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2427,11 +2427,6 @@ static int vmbus_bus_suspend(struct device *dev) if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) wait_for_completion(&vmbus_connection.ready_for_suspend_event); - if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { - pr_err("Can not suspend due to a previous failed resuming\n"); - return -EBUSY; - } - mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { @@ -2456,22 +2451,18 @@ static int vmbus_bus_suspend(struct device *dev) pr_err("Sub-channel not deleted!\n"); WARN_ON_ONCE(1); } - - atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); } mutex_unlock(&vmbus_connection.channel_mutex); vmbus_initiate_unload(false); - /* Reset the event for the next resume. */ - reinit_completion(&vmbus_connection.ready_for_resume_event); - return 0; } static int vmbus_bus_resume(struct device *dev) { + struct vmbus_channel *channel; struct vmbus_channel_msginfo *msginfo; size_t msgsize; int ret; @@ -2502,13 +2493,23 @@ static int vmbus_bus_resume(struct device *dev) if (ret != 0) return ret; - WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); - vmbus_request_offers(); - if (wait_for_completion_timeout( - &vmbus_connection.ready_for_resume_event, secs_to_jiffies(10)) == 0) - pr_err("Some vmbus device is missing after suspending?\n"); + mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (channel->offermsg.child_relid != INVALID_RELID) + continue; + + /* hvsock channels are not expected to be present. */ + if (is_hvsock_channel(channel)) + continue; + + pr_err("channel %pUl/%pUl not present after resume.\n", + &channel->offermsg.offer.if_type, + &channel->offermsg.offer.if_instance); + /* ToDo: Cleanup these channels here */ + } + mutex_unlock(&vmbus_connection.channel_mutex); /* Reset the event for the next suspend. */ reinit_completion(&vmbus_connection.ready_for_suspend_event); diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 8a5c17b97310..2a86aa5d54c6 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -164,8 +164,8 @@ static int __init hyperv_prepare_irq_remapping(void) * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu * into ioapic_max_cpumask if its APIC ID is less than 256. */ - for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--) - if (cpu_physical_id(i) < 256) + for (i = min_t(unsigned int, nr_cpu_ids - 1, 255); i >= 0; i--) + if (cpu_possible(i) && cpu_physical_id(i) < 256) cpumask_set_cpu(i, &ioapic_max_cpumask); return 0; diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 3976360d0096..1b19b5647495 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -296,51 +296,51 @@ hv_uio_probe(struct hv_device *dev, pdata->info.mem[MON_PAGE_MAP].size = PAGE_SIZE; pdata->info.mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL; - pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE); - if (pdata->recv_buf == NULL) { - ret = -ENOMEM; - goto fail_free_ring; + if (channel->device_id == HV_NIC) { + pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE); + if (!pdata->recv_buf) { + ret = -ENOMEM; + goto fail_free_ring; + } + + ret = vmbus_establish_gpadl(channel, pdata->recv_buf, + RECV_BUFFER_SIZE, &pdata->recv_gpadl); + if (ret) { + if (!pdata->recv_gpadl.decrypted) + vfree(pdata->recv_buf); + goto fail_close; + } + + /* put Global Physical Address Label in name */ + snprintf(pdata->recv_name, sizeof(pdata->recv_name), + "recv:%u", pdata->recv_gpadl.gpadl_handle); + pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name; + pdata->info.mem[RECV_BUF_MAP].addr = (uintptr_t)pdata->recv_buf; + pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE; + pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL; + + pdata->send_buf = vzalloc(SEND_BUFFER_SIZE); + if (!pdata->send_buf) { + ret = -ENOMEM; + goto fail_close; + } + + ret = vmbus_establish_gpadl(channel, pdata->send_buf, + SEND_BUFFER_SIZE, &pdata->send_gpadl); + if (ret) { + if (!pdata->send_gpadl.decrypted) + vfree(pdata->send_buf); + goto fail_close; + } + + snprintf(pdata->send_name, sizeof(pdata->send_name), + "send:%u", pdata->send_gpadl.gpadl_handle); + pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name; + pdata->info.mem[SEND_BUF_MAP].addr = (uintptr_t)pdata->send_buf; + pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE; + pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL; } - ret = vmbus_establish_gpadl(channel, pdata->recv_buf, - RECV_BUFFER_SIZE, &pdata->recv_gpadl); - if (ret) { - if (!pdata->recv_gpadl.decrypted) - vfree(pdata->recv_buf); - goto fail_close; - } - - /* put Global Physical Address Label in name */ - snprintf(pdata->recv_name, sizeof(pdata->recv_name), - "recv:%u", pdata->recv_gpadl.gpadl_handle); - pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name; - pdata->info.mem[RECV_BUF_MAP].addr - = (uintptr_t)pdata->recv_buf; - pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE; - pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL; - - pdata->send_buf = vzalloc(SEND_BUFFER_SIZE); - if (pdata->send_buf == NULL) { - ret = -ENOMEM; - goto fail_close; - } - - ret = vmbus_establish_gpadl(channel, pdata->send_buf, - SEND_BUFFER_SIZE, &pdata->send_gpadl); - if (ret) { - if (!pdata->send_gpadl.decrypted) - vfree(pdata->send_buf); - goto fail_close; - } - - snprintf(pdata->send_name, sizeof(pdata->send_name), - "send:%u", pdata->send_gpadl.gpadl_handle); - pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name; - pdata->info.mem[SEND_BUF_MAP].addr - = (uintptr_t)pdata->send_buf; - pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE; - pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL; - pdata->info.priv = pdata; pdata->device = dev; |