summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-25 09:22:55 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-25 09:22:55 -0800
commit382e391365ca12d1e5a15f109ba8b4609d58db6b (patch)
tree82f10864a9e2898a811ae46b60fcf765456e37ab /drivers
parentb46c89c08f4146e7987fc355941a93b12e2c03ef (diff)
parent2e03358be78b65d28b66e17aca9e0c8700b0df78 (diff)
Merge tag 'hyperv-next-signed-20250123' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu: - Introduce a new set of Hyper-V headers in include/hyperv and replace the old hyperv-tlfs.h with the new headers (Nuno Das Neves) - Fixes for the Hyper-V VTL mode (Roman Kisel) - Fixes for cpu mask usage in Hyper-V code (Michael Kelley) - Document the guest VM hibernation behaviour (Michael Kelley) - Miscellaneous fixes and cleanups (Jacob Pan, John Starks, Naman Jain) * tag 'hyperv-next-signed-20250123' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: Documentation: hyperv: Add overview of guest VM hibernation hyperv: Do not overlap the hvcall IO areas in hv_vtl_apicid_to_vp_id() hyperv: Do not overlap the hvcall IO areas in get_vtl() hyperv: Enable the hypercall output page for the VTL mode hv_balloon: Fallback to generic_online_page() for non-HV hot added mem Drivers: hv: vmbus: Log on missing offers if any Drivers: hv: vmbus: Wait for boot-time offers during boot and resume uio_hv_generic: Add a check for HV_NIC for send, receive buffers setup iommu/hyper-v: Don't assume cpu_possible_mask is dense Drivers: hv: Don't assume cpu_possible_mask is dense x86/hyperv: Don't assume cpu_possible_mask is dense hyperv: Remove the now unused hyperv-tlfs.h files hyperv: Switch from hyperv-tlfs.h to hyperv/hvhdk.h hyperv: Add new Hyper-V headers in include/hyperv hyperv: Clean up unnecessary #includes hyperv: Move hv_connection_id to hyperv-tlfs.h
Diffstat (limited to 'drivers')
-rw-r--r--drivers/clocksource/hyperv_timer.c2
-rw-r--r--drivers/hv/channel_mgmt.c61
-rw-r--r--drivers/hv/connection.c4
-rw-r--r--drivers/hv/hv_balloon.c22
-rw-r--r--drivers/hv/hv_common.c17
-rw-r--r--drivers/hv/hv_kvp.c2
-rw-r--r--drivers/hv/hv_snapshot.c2
-rw-r--r--drivers/hv/hyperv_vmbus.h16
-rw-r--r--drivers/hv/vmbus_drv.c31
-rw-r--r--drivers/iommu/hyperv-iommu.c4
-rw-r--r--drivers/uio/uio_hv_generic.c86
11 files changed, 139 insertions, 108 deletions
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index b39dee7b93af..f00019b078a7 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -23,7 +23,7 @@
#include <linux/acpi.h>
#include <linux/hyperv.h>
#include <clocksource/hyperv_timer.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
static struct clock_event_device __percpu *hv_clock_event;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 3c6011a48dab..6e084c207414 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -944,16 +944,6 @@ void vmbus_initiate_unload(bool crash)
vmbus_wait_for_unload();
}
-static void check_ready_for_resume_event(void)
-{
- /*
- * If all the old primary channels have been fixed up, then it's safe
- * to resume.
- */
- if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
- complete(&vmbus_connection.ready_for_resume_event);
-}
-
static void vmbus_setup_channel_state(struct vmbus_channel *channel,
struct vmbus_channel_offer_channel *offer)
{
@@ -1109,8 +1099,6 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
/* Add the channel back to the array of channels. */
vmbus_channel_map_relid(oldchannel);
- check_ready_for_resume_event();
-
mutex_unlock(&vmbus_connection.channel_mutex);
return;
}
@@ -1296,13 +1284,28 @@ EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
/*
* vmbus_onoffers_delivered -
- * This is invoked when all offers have been delivered.
+ * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all
+ * boot-time offers are delivered. A boot-time offer is for the primary
+ * channel for any virtual hardware configured in the VM at the time it boots.
+ * Boot-time offers include offers for physical devices assigned to the VM
+ * via Hyper-V's Discrete Device Assignment (DDA) functionality that are
+ * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs).
+ * Boot-time offers do not include offers for VMBus sub-channels. Because
+ * devices can be hot-added to the VM after it is booted, additional channel
+ * offers that aren't boot-time offers can be received at any time after the
+ * all-offers-delivered message.
*
- * Nothing to do here.
+ * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered
+ * to be assigned to the VM at boot-time, and offers for VFs may occur after
+ * the all-offers-delivered message. VFs are optional accelerators to the
+ * synthetic VMBus NIC and are effectively hot-added only after the VMBus
+ * NIC channel is opened (once it knows the guest can support it, via the
+ * sriov bit in the netvsc protocol).
*/
static void vmbus_onoffers_delivered(
struct vmbus_channel_message_header *hdr)
{
+ complete(&vmbus_connection.all_offers_delivered_event);
}
/*
@@ -1578,7 +1581,8 @@ void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
}
/*
- * vmbus_request_offers - Send a request to get all our pending offers.
+ * vmbus_request_offers - Send a request to get all our pending offers
+ * and wait for all boot-time offers to arrive.
*/
int vmbus_request_offers(void)
{
@@ -1596,6 +1600,10 @@ int vmbus_request_offers(void)
msg->msgtype = CHANNELMSG_REQUESTOFFERS;
+ /*
+ * This REQUESTOFFERS message will result in the host sending an all
+ * offers delivered message after all the boot-time offers are sent.
+ */
ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
true);
@@ -1607,6 +1615,29 @@ int vmbus_request_offers(void)
goto cleanup;
}
+ /*
+ * Wait for the host to send all boot-time offers.
+ * Keeping it as a best-effort mechanism, where a warning is
+ * printed if a timeout occurs, and execution is resumed.
+ */
+ if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event,
+ secs_to_jiffies(60))) {
+ pr_warn("timed out waiting for all boot-time offers to be delivered.\n");
+ }
+
+ /*
+ * Flush handling of offer messages (which may initiate work on
+ * other work queues).
+ */
+ flush_workqueue(vmbus_connection.work_queue);
+
+ /*
+ * Flush workqueue for processing the incoming offers. Subchannel
+ * offers and their processing can happen later, so there is no need to
+ * flush that workqueue here.
+ */
+ flush_workqueue(vmbus_connection.handle_primary_chan_wq);
+
cleanup:
kfree(msginfo);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index f001ae880e1d..8351360bba16 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -34,8 +34,8 @@ struct vmbus_connection vmbus_connection = {
.ready_for_suspend_event = COMPLETION_INITIALIZER(
vmbus_connection.ready_for_suspend_event),
- .ready_for_resume_event = COMPLETION_INITIALIZER(
- vmbus_connection.ready_for_resume_event),
+ .all_offers_delivered_event = COMPLETION_INITIALIZER(
+ vmbus_connection.all_offers_delivered_event),
};
EXPORT_SYMBOL_GPL(vmbus_connection);
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index a99112e6f0b8..fec2f18679e3 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -28,7 +28,7 @@
#include <linux/sizes.h>
#include <linux/hyperv.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
@@ -766,16 +766,18 @@ static void hv_online_page(struct page *pg, unsigned int order)
struct hv_hotadd_state *has;
unsigned long pfn = page_to_pfn(pg);
- guard(spinlock_irqsave)(&dm_device.ha_lock);
- list_for_each_entry(has, &dm_device.ha_region_list, list) {
- /* The page belongs to a different HAS. */
- if (pfn < has->start_pfn ||
- (pfn + (1UL << order) > has->end_pfn))
- continue;
+ scoped_guard(spinlock_irqsave, &dm_device.ha_lock) {
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
+ /* The page belongs to a different HAS. */
+ if (pfn < has->start_pfn ||
+ (pfn + (1UL << order) > has->end_pfn))
+ continue;
- hv_bring_pgs_online(has, pfn, 1UL << order);
- break;
+ hv_bring_pgs_online(has, pfn, 1UL << order);
+ return;
+ }
}
+ generic_online_page(pg, order);
}
static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
@@ -1586,7 +1588,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
return -ENOSPC;
}
- hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD;
+ hint->heat_type = HV_EXTMEM_HEAT_HINT_COLD_DISCARD;
hint->reserved = 0;
for_each_sg(sgl, sg, nents, i) {
union hv_gpa_page_range *range;
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 7a35c82976e0..af5d1dc451f6 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -28,7 +28,7 @@
#include <linux/slab.h>
#include <linux/dma-map-ops.h>
#include <linux/set_memory.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
/*
@@ -278,6 +278,11 @@ static void hv_kmsg_dump_register(void)
}
}
+static inline bool hv_output_page_exists(void)
+{
+ return hv_root_partition || IS_ENABLED(CONFIG_HYPERV_VTL_MODE);
+}
+
int __init hv_common_init(void)
{
int i;
@@ -340,19 +345,19 @@ int __init hv_common_init(void)
BUG_ON(!hyperv_pcpu_input_arg);
/* Allocate the per-CPU state for output arg for root */
- if (hv_root_partition) {
+ if (hv_output_page_exists()) {
hyperv_pcpu_output_arg = alloc_percpu(void *);
BUG_ON(!hyperv_pcpu_output_arg);
}
- hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
+ hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
GFP_KERNEL);
if (!hv_vp_index) {
hv_common_free();
return -ENOMEM;
}
- for (i = 0; i < num_possible_cpus(); i++)
+ for (i = 0; i < nr_cpu_ids; i++)
hv_vp_index[i] = VP_INVAL;
return 0;
@@ -435,7 +440,7 @@ int hv_common_cpu_init(unsigned int cpu)
void **inputarg, **outputarg;
u64 msr_vp_index;
gfp_t flags;
- int pgcount = hv_root_partition ? 2 : 1;
+ const int pgcount = hv_output_page_exists() ? 2 : 1;
void *mem;
int ret;
@@ -453,7 +458,7 @@ int hv_common_cpu_init(unsigned int cpu)
if (!mem)
return -ENOMEM;
- if (hv_root_partition) {
+ if (hv_output_page_exists()) {
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
}
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 7400a5a4d2bd..62795f6cbb00 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -27,7 +27,7 @@
#include <linux/connector.h>
#include <linux/workqueue.h>
#include <linux/hyperv.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
#include "hyperv_vmbus.h"
#include "hv_utils_transport.h"
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index bde637a96c37..2e7f537d53cf 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -12,7 +12,7 @@
#include <linux/connector.h>
#include <linux/workqueue.h>
#include <linux/hyperv.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
#include "hyperv_vmbus.h"
#include "hv_utils_transport.h"
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 52cb744b4d7f..29780f3a7478 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -15,10 +15,10 @@
#include <linux/list.h>
#include <linux/bitops.h>
#include <asm/sync_bitops.h>
-#include <asm/hyperv-tlfs.h>
#include <linux/atomic.h>
#include <linux/hyperv.h>
#include <linux/interrupt.h>
+#include <hyperv/hvhdk.h>
#include "hv_trace.h"
@@ -287,18 +287,10 @@ struct vmbus_connection {
struct completion ready_for_suspend_event;
/*
- * The number of primary channels that should be "fixed up"
- * upon resume: these channels are re-offered upon resume, and some
- * fields of the channel offers (i.e. child_relid and connection_id)
- * can change, so the old offermsg must be fixed up, before the resume
- * callbacks of the VSC drivers start to further touch the channels.
+ * Completed once the host has offered all boot-time channels.
+ * Note that some channels may still be under process on a workqueue.
*/
- atomic_t nr_chan_fixup_on_resume;
- /*
- * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
- * drop to zero.
- */
- struct completion ready_for_resume_event;
+ struct completion all_offers_delivered_event;
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2892b8da20a5..0f6cd44fff29 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2427,11 +2427,6 @@ static int vmbus_bus_suspend(struct device *dev)
if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
wait_for_completion(&vmbus_connection.ready_for_suspend_event);
- if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) {
- pr_err("Can not suspend due to a previous failed resuming\n");
- return -EBUSY;
- }
-
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
@@ -2456,22 +2451,18 @@ static int vmbus_bus_suspend(struct device *dev)
pr_err("Sub-channel not deleted!\n");
WARN_ON_ONCE(1);
}
-
- atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
}
mutex_unlock(&vmbus_connection.channel_mutex);
vmbus_initiate_unload(false);
- /* Reset the event for the next resume. */
- reinit_completion(&vmbus_connection.ready_for_resume_event);
-
return 0;
}
static int vmbus_bus_resume(struct device *dev)
{
+ struct vmbus_channel *channel;
struct vmbus_channel_msginfo *msginfo;
size_t msgsize;
int ret;
@@ -2502,13 +2493,23 @@ static int vmbus_bus_resume(struct device *dev)
if (ret != 0)
return ret;
- WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
-
vmbus_request_offers();
- if (wait_for_completion_timeout(
- &vmbus_connection.ready_for_resume_event, secs_to_jiffies(10)) == 0)
- pr_err("Some vmbus device is missing after suspending?\n");
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ continue;
+
+ /* hvsock channels are not expected to be present. */
+ if (is_hvsock_channel(channel))
+ continue;
+
+ pr_err("channel %pUl/%pUl not present after resume.\n",
+ &channel->offermsg.offer.if_type,
+ &channel->offermsg.offer.if_instance);
+ /* ToDo: Cleanup these channels here */
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
/* Reset the event for the next suspend. */
reinit_completion(&vmbus_connection.ready_for_suspend_event);
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 8a5c17b97310..2a86aa5d54c6 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -164,8 +164,8 @@ static int __init hyperv_prepare_irq_remapping(void)
* max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu
* into ioapic_max_cpumask if its APIC ID is less than 256.
*/
- for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--)
- if (cpu_physical_id(i) < 256)
+ for (i = min_t(unsigned int, nr_cpu_ids - 1, 255); i >= 0; i--)
+ if (cpu_possible(i) && cpu_physical_id(i) < 256)
cpumask_set_cpu(i, &ioapic_max_cpumask);
return 0;
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 3976360d0096..1b19b5647495 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -296,51 +296,51 @@ hv_uio_probe(struct hv_device *dev,
pdata->info.mem[MON_PAGE_MAP].size = PAGE_SIZE;
pdata->info.mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL;
- pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE);
- if (pdata->recv_buf == NULL) {
- ret = -ENOMEM;
- goto fail_free_ring;
+ if (channel->device_id == HV_NIC) {
+ pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE);
+ if (!pdata->recv_buf) {
+ ret = -ENOMEM;
+ goto fail_free_ring;
+ }
+
+ ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
+ RECV_BUFFER_SIZE, &pdata->recv_gpadl);
+ if (ret) {
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
+ goto fail_close;
+ }
+
+ /* put Global Physical Address Label in name */
+ snprintf(pdata->recv_name, sizeof(pdata->recv_name),
+ "recv:%u", pdata->recv_gpadl.gpadl_handle);
+ pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name;
+ pdata->info.mem[RECV_BUF_MAP].addr = (uintptr_t)pdata->recv_buf;
+ pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE;
+ pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
+
+ pdata->send_buf = vzalloc(SEND_BUFFER_SIZE);
+ if (!pdata->send_buf) {
+ ret = -ENOMEM;
+ goto fail_close;
+ }
+
+ ret = vmbus_establish_gpadl(channel, pdata->send_buf,
+ SEND_BUFFER_SIZE, &pdata->send_gpadl);
+ if (ret) {
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
+ goto fail_close;
+ }
+
+ snprintf(pdata->send_name, sizeof(pdata->send_name),
+ "send:%u", pdata->send_gpadl.gpadl_handle);
+ pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name;
+ pdata->info.mem[SEND_BUF_MAP].addr = (uintptr_t)pdata->send_buf;
+ pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE;
+ pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
}
- ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
- RECV_BUFFER_SIZE, &pdata->recv_gpadl);
- if (ret) {
- if (!pdata->recv_gpadl.decrypted)
- vfree(pdata->recv_buf);
- goto fail_close;
- }
-
- /* put Global Physical Address Label in name */
- snprintf(pdata->recv_name, sizeof(pdata->recv_name),
- "recv:%u", pdata->recv_gpadl.gpadl_handle);
- pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name;
- pdata->info.mem[RECV_BUF_MAP].addr
- = (uintptr_t)pdata->recv_buf;
- pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE;
- pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
-
- pdata->send_buf = vzalloc(SEND_BUFFER_SIZE);
- if (pdata->send_buf == NULL) {
- ret = -ENOMEM;
- goto fail_close;
- }
-
- ret = vmbus_establish_gpadl(channel, pdata->send_buf,
- SEND_BUFFER_SIZE, &pdata->send_gpadl);
- if (ret) {
- if (!pdata->send_gpadl.decrypted)
- vfree(pdata->send_buf);
- goto fail_close;
- }
-
- snprintf(pdata->send_name, sizeof(pdata->send_name),
- "send:%u", pdata->send_gpadl.gpadl_handle);
- pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name;
- pdata->info.mem[SEND_BUF_MAP].addr
- = (uintptr_t)pdata->send_buf;
- pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE;
- pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
-
pdata->info.priv = pdata;
pdata->device = dev;