summaryrefslogtreecommitdiff
path: root/drivers/hv/vmbus_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv/vmbus_drv.c')
-rw-r--r--drivers/hv/vmbus_drv.c881
1 files changed, 520 insertions, 361 deletions
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 3146710d4ac6..a53af6fe81a6 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
@@ -19,6 +20,7 @@
#include <linux/completion.h>
#include <linux/hyperv.h>
#include <linux/kernel_stat.h>
+#include <linux/of_address.h>
#include <linux/clockchips.h>
#include <linux/cpu.h>
#include <linux/sched/isolation.h>
@@ -28,13 +30,13 @@
#include <linux/panic_notifier.h>
#include <linux/ptrace.h>
#include <linux/screen_info.h>
-#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/syscore_ops.h>
#include <linux/dma-map-ops.h>
#include <linux/pci.h>
+#include <linux/export.h>
#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
@@ -44,12 +46,11 @@ struct vmbus_dynid {
struct hv_vmbus_device_id id;
};
-static struct acpi_device *hv_acpi_dev;
+/* VMBus Root Device */
+static struct device *vmbus_root_device;
static int hyperv_cpuhp_online;
-static void *hv_panic_page;
-
static long __percpu *vmbus_evt;
/* Values parsed from ACPI DSDT */
@@ -57,16 +58,16 @@ int vmbus_irq;
int vmbus_interrupt;
/*
- * Boolean to control whether to report panic messages over Hyper-V.
- *
- * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
+ * If the Confidential VMBus is used, the data on the "wire" is not
+ * visible to either the host or the hypervisor.
*/
-static int sysctl_record_panic_msg = 1;
+static bool is_confidential;
-static int hyperv_report_reg(void)
+bool vmbus_is_confidential(void)
{
- return !sysctl_record_panic_msg || !hv_panic_page;
+ return is_confidential;
}
+EXPORT_SYMBOL_GPL(vmbus_is_confidential);
/*
* The panic notifier below is responsible solely for unloading the
@@ -88,62 +89,20 @@ static struct notifier_block hyperv_panic_vmbus_unload_block = {
.priority = INT_MIN + 1, /* almost the latest one to execute */
};
-static int hv_die_panic_notify_crash(struct notifier_block *self,
- unsigned long val, void *args);
-
-static struct notifier_block hyperv_die_report_block = {
- .notifier_call = hv_die_panic_notify_crash,
-};
-static struct notifier_block hyperv_panic_report_block = {
- .notifier_call = hv_die_panic_notify_crash,
-};
-
-/*
- * The following callback works both as die and panic notifier; its
- * goal is to provide panic information to the hypervisor unless the
- * kmsg dumper is used [see hv_kmsg_dump()], which provides more
- * information but isn't always available.
- *
- * Notice that both the panic/die report notifiers are registered only
- * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
- */
-static int hv_die_panic_notify_crash(struct notifier_block *self,
- unsigned long val, void *args)
-{
- struct pt_regs *regs;
- bool is_die;
-
- /* Don't notify Hyper-V unless we have a die oops event or panic. */
- if (self == &hyperv_panic_report_block) {
- is_die = false;
- regs = current_pt_regs();
- } else { /* die event */
- if (val != DIE_OOPS)
- return NOTIFY_DONE;
-
- is_die = true;
- regs = ((struct die_args *)args)->regs;
- }
-
- /*
- * Hyper-V should be notified only once about a panic/die. If we will
- * be calling hv_kmsg_dump() later with kmsg data, don't do the
- * notification here.
- */
- if (hyperv_report_reg())
- hyperv_report_panic(regs, val, is_die);
-
- return NOTIFY_DONE;
-}
-
static const char *fb_mmio_name = "fb_range";
static struct resource *fb_mmio;
static struct resource *hyperv_mmio;
static DEFINE_MUTEX(hyperv_mmio_lock);
+struct device *hv_get_vmbus_root_device(void)
+{
+ return vmbus_root_device;
+}
+EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device);
+
static int vmbus_exists(void)
{
- if (hv_acpi_dev == NULL)
+ if (vmbus_root_device == NULL)
return -ENODEV;
return 0;
@@ -192,7 +151,7 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}
static DEVICE_ATTR_RO(id);
@@ -203,7 +162,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->state);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->state);
}
static DEVICE_ATTR_RO(state);
@@ -214,7 +173,7 @@ static ssize_t monitor_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
}
static DEVICE_ATTR_RO(monitor_id);
@@ -225,8 +184,8 @@ static ssize_t class_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_type);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_type);
}
static DEVICE_ATTR_RO(class_id);
@@ -237,8 +196,8 @@ static ssize_t device_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_instance);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_instance);
}
static DEVICE_ATTR_RO(device_id);
@@ -247,7 +206,7 @@ static ssize_t modalias_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
+ return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
}
static DEVICE_ATTR_RO(modalias);
@@ -260,7 +219,7 @@ static ssize_t numa_node_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
+ return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
}
static DEVICE_ATTR_RO(numa_node);
#endif
@@ -273,9 +232,8 @@ static ssize_t server_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_pending);
@@ -287,9 +245,8 @@ static ssize_t client_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_pending);
@@ -301,9 +258,8 @@ static ssize_t server_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_latency);
@@ -315,9 +271,8 @@ static ssize_t client_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_latency);
@@ -329,9 +284,8 @@ static ssize_t server_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_conn_id);
@@ -343,9 +297,8 @@ static ssize_t client_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_conn_id);
@@ -364,7 +317,7 @@ static ssize_t out_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);
@@ -382,7 +335,7 @@ static ssize_t out_read_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_read_index);
+ return sysfs_emit(buf, "%u\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);
@@ -401,7 +354,7 @@ static ssize_t out_write_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_write_index);
+ return sysfs_emit(buf, "%u\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);
@@ -420,7 +373,7 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);
@@ -439,7 +392,7 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);
@@ -457,7 +410,7 @@ static ssize_t in_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);
@@ -475,7 +428,7 @@ static ssize_t in_read_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);
@@ -493,7 +446,7 @@ static ssize_t in_write_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);
@@ -512,7 +465,7 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);
@@ -531,7 +484,7 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);
@@ -541,7 +494,7 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
- int buf_size = PAGE_SIZE, n_written, tot_written;
+ int n_written;
struct list_head *cur;
if (!channel)
@@ -549,25 +502,21 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
mutex_lock(&vmbus_connection.channel_mutex);
- tot_written = snprintf(buf, buf_size, "%u:%u\n",
- channel->offermsg.child_relid, channel->target_cpu);
+ n_written = sysfs_emit(buf, "%u:%u\n",
+ channel->offermsg.child_relid,
+ channel->target_cpu);
list_for_each(cur, &channel->sc_list) {
- if (tot_written >= buf_size - 1)
- break;
cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
- n_written = scnprintf(buf + tot_written,
- buf_size - tot_written,
- "%u:%u\n",
- cur_sc->offermsg.child_relid,
- cur_sc->target_cpu);
- tot_written += n_written;
+ n_written += sysfs_emit_at(buf, n_written, "%u:%u\n",
+ cur_sc->offermsg.child_relid,
+ cur_sc->target_cpu);
}
mutex_unlock(&vmbus_connection.channel_mutex);
- return tot_written;
+ return n_written;
}
static DEVICE_ATTR_RO(channel_vp_mapping);
@@ -577,7 +526,7 @@ static ssize_t vendor_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id);
}
static DEVICE_ATTR_RO(vendor);
@@ -587,7 +536,7 @@ static ssize_t device_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->device_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->device_id);
}
static DEVICE_ATTR_RO(device);
@@ -612,7 +561,7 @@ static ssize_t driver_override_show(struct device *dev,
ssize_t len;
device_lock(dev);
- len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+ len = sysfs_emit(buf, "%s\n", hv_dev->driver_override);
device_unlock(dev);
return len;
@@ -684,7 +633,7 @@ static const struct attribute_group vmbus_dev_group = {
__ATTRIBUTE_GROUPS(vmbus_dev);
/* Set up the attribute for /sys/bus/vmbus/hibernation */
-static ssize_t hibernation_show(struct bus_type *bus, char *buf)
+static ssize_t hibernation_show(const struct bus_type *bus, char *buf)
{
return sprintf(buf, "%d\n", !!hv_is_hibernation_supported());
}
@@ -711,9 +660,9 @@ __ATTRIBUTE_GROUPS(vmbus_bus);
* representation of the device guid (each byte of the guid will be
* represented with two hex characters.
*/
-static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
+static int vmbus_uevent(const struct device *device, struct kobj_uevent_env *env)
{
- struct hv_device *dev = device_to_hv_device(device);
+ const struct hv_device *dev = device_to_hv_device(device);
const char *format = "MODALIAS=vmbus:%*phN";
return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type);
@@ -756,7 +705,7 @@ static const struct hv_vmbus_device_id vmbus_device_null;
* Return a matching hv_vmbus_device_id pointer.
* If there is no match, return NULL.
*/
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(const struct hv_driver *drv,
struct hv_device *dev)
{
const guid_t *guid = &dev->dev_type;
@@ -767,7 +716,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
return NULL;
/* Look at the dynamic ids first, before the static ones */
- id = hv_vmbus_dynid_match(drv, guid);
+ id = hv_vmbus_dynid_match((struct hv_driver *)drv, guid);
if (!id)
id = hv_vmbus_dev_match(drv->id_table, guid);
@@ -778,7 +727,30 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
return id;
}
-/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
+/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices
+ *
+ * This function can race with vmbus_device_register(). This function is
+ * typically running on a user thread in response to writing to the "new_id"
+ * sysfs entry for a driver. vmbus_device_register() is running on a
+ * workqueue thread in response to the Hyper-V host offering a device to the
+ * guest. This function calls driver_attach(), which looks for an existing
+ * device matching the new id, and attaches the driver to which the new id
+ * has been assigned. vmbus_device_register() calls device_register(), which
+ * looks for a driver that matches the device being registered. If both
+ * operations are running simultaneously, the device driver probe function runs
+ * on whichever thread establishes the linkage between the driver and device.
+ *
+ * In most cases, it doesn't matter which thread runs the driver probe
+ * function. But if vmbus_device_register() does not find a matching driver,
+ * it proceeds to create the "channels" subdirectory and numbered per-channel
+ * subdirectory in sysfs. While that multi-step creation is in progress, this
+ * function could run the driver probe function. If the probe function checks
+ * for, or operates on, entries in the "channels" subdirectory, including by
+ * calling hv_create_ring_sysfs(), the operation may or may not succeed
+ * depending on the race. The race can't create a kernel failure in VMBus
+ * or device subsystem code, but probe functions in VMBus drivers doing such
+ * operations must be prepared for the failure case.
+ */
static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
{
struct vmbus_dynid *dynid;
@@ -880,9 +852,9 @@ ATTRIBUTE_GROUPS(vmbus_drv);
/*
* vmbus_match - Attempt to match the specified device to the specified driver
*/
-static int vmbus_match(struct device *device, struct device_driver *driver)
+static int vmbus_match(struct device *device, const struct device_driver *driver)
{
- struct hv_driver *drv = drv_to_hv_drv(driver);
+ const struct hv_driver *drv = drv_to_hv_drv(driver);
struct hv_device *hv_dev = device_to_hv_device(device);
/* The hv_sock driver handles all hv_sock offers. */
@@ -932,7 +904,7 @@ static int vmbus_dma_configure(struct device *child_device)
* On x86/x64 coherence is assumed and these calls have no effect.
*/
hv_setup_dma_ops(child_device,
- device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT);
+ device_get_dma_attr(vmbus_root_device) == DEV_DMA_COHERENT);
return 0;
}
@@ -1049,7 +1021,7 @@ static const struct dev_pm_ops vmbus_pm = {
};
/* The one and only one */
-static struct bus_type hv_bus = {
+static const struct bus_type hv_bus = {
.name = "vmbus",
.match = vmbus_match,
.shutdown = vmbus_shutdown,
@@ -1086,12 +1058,9 @@ static void vmbus_onmessage_work(struct work_struct *work)
kfree(ctx);
}
-void vmbus_on_msg_dpc(unsigned long data)
+static void __vmbus_on_msg_dpc(void *message_page_addr)
{
- struct hv_per_cpu_context *hv_cpu = (void *)data;
- void *page_addr = hv_cpu->synic_message_page;
- struct hv_message msg_copy, *msg = (struct hv_message *)page_addr +
- VMBUS_MESSAGE_SINT;
+ struct hv_message msg_copy, *msg;
struct vmbus_channel_message_header *hdr;
enum vmbus_channel_message_type msgtype;
const struct vmbus_channel_message_table_entry *entry;
@@ -1099,6 +1068,10 @@ void vmbus_on_msg_dpc(unsigned long data)
__u8 payload_size;
u32 message_type;
+ if (!message_page_addr)
+ return;
+ msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT;
+
/*
* 'enum vmbus_channel_message_type' is supposed to always be 'u32' as
* it is being used in 'struct vmbus_channel_message_header' definition
@@ -1224,6 +1197,14 @@ msg_handled:
vmbus_signal_eom(msg, message_type);
}
+void vmbus_on_msg_dpc(unsigned long data)
+{
+ struct hv_per_cpu_context *hv_cpu = (void *)data;
+
+ __vmbus_on_msg_dpc(hv_cpu->hyp_synic_message_page);
+ __vmbus_on_msg_dpc(hv_cpu->para_synic_message_page);
+}
+
#ifdef CONFIG_PM_SLEEP
/*
* Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
@@ -1262,21 +1243,19 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
#endif /* CONFIG_PM_SLEEP */
/*
- * Schedule all channels with events pending
+ * Schedule all channels with events pending.
+ * The event page can be directly checked to get the id of
+ * the channel that has the interrupt pending.
*/
-static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
+static void vmbus_chan_sched(void *event_page_addr)
{
unsigned long *recv_int_page;
u32 maxbits, relid;
+ union hv_synic_event_flags *event;
- /*
- * The event page can be directly checked to get the id of
- * the channel that has the interrupt pending.
- */
- void *page_addr = hv_cpu->synic_event_page;
- union hv_synic_event_flags *event
- = (union hv_synic_event_flags *)page_addr +
- VMBUS_MESSAGE_SINT;
+ if (!event_page_addr)
+ return;
+ event = (union hv_synic_event_flags *)event_page_addr + VMBUS_MESSAGE_SINT;
maxbits = HV_EVENT_FLAGS_COUNT;
recv_int_page = event->flags;
@@ -1284,6 +1263,11 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
if (unlikely(!recv_int_page))
return;
+ /*
+ * Suggested-by: Michael Kelley <mhklinux@outlook.com>
+ * One possible optimization would be to keep track of the largest relID that's in use,
+ * and only scan up to that relID.
+ */
for_each_set_bit(relid, recv_int_page, maxbits) {
void (*callback_fn)(void *context);
struct vmbus_channel *channel;
@@ -1347,29 +1331,39 @@ sched_unlock_rcu:
}
}
-static void vmbus_isr(void)
+static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu, void *message_page_addr)
{
- struct hv_per_cpu_context *hv_cpu
- = this_cpu_ptr(hv_context.cpu_context);
- void *page_addr;
struct hv_message *msg;
- vmbus_chan_sched(hv_cpu);
-
- page_addr = hv_cpu->synic_message_page;
- msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
+ if (!message_page_addr)
+ return;
+ msg = (struct hv_message *)message_page_addr + VMBUS_MESSAGE_SINT;
/* Check if there are actual msgs to be processed */
if (msg->header.message_type != HVMSG_NONE) {
if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
hv_stimer0_isr();
vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
- } else
+ } else {
tasklet_schedule(&hv_cpu->msg_dpc);
+ }
}
+}
+
+void vmbus_isr(void)
+{
+ struct hv_per_cpu_context *hv_cpu
+ = this_cpu_ptr(hv_context.cpu_context);
+
+ vmbus_chan_sched(hv_cpu->hyp_synic_event_page);
+ vmbus_chan_sched(hv_cpu->para_synic_event_page);
+
+ vmbus_message_sched(hv_cpu, hv_cpu->hyp_synic_message_page);
+ vmbus_message_sched(hv_cpu, hv_cpu->para_synic_message_page);
add_interrupt_randomness(vmbus_interrupt);
}
+EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl");
static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
{
@@ -1377,97 +1371,65 @@ static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-/*
- * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
- * buffer and call into Hyper-V to transfer the data.
- */
-static void hv_kmsg_dump(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+static void vmbus_percpu_work(struct work_struct *work)
{
- struct kmsg_dump_iter iter;
- size_t bytes_written;
+ unsigned int cpu = smp_processor_id();
- /* We are only interested in panics. */
- if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg))
- return;
-
- /*
- * Write dump contents to the page. No need to synchronize; panic should
- * be single-threaded.
- */
- kmsg_dump_rewind(&iter);
- kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
- &bytes_written);
- if (!bytes_written)
- return;
- /*
- * P3 to contain the physical address of the panic page & P4 to
- * contain the size of the panic data in that page. Rest of the
- * registers are no-op when the NOTIFY_MSG flag is set.
- */
- hv_set_register(HV_REGISTER_CRASH_P0, 0);
- hv_set_register(HV_REGISTER_CRASH_P1, 0);
- hv_set_register(HV_REGISTER_CRASH_P2, 0);
- hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
- hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
-
- /*
- * Let Hyper-V know there is crash data available along with
- * the panic message.
- */
- hv_set_register(HV_REGISTER_CRASH_CTL,
- (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG));
+ hv_synic_init(cpu);
}
-static struct kmsg_dumper hv_kmsg_dumper = {
- .dump = hv_kmsg_dump,
-};
-
-static void hv_kmsg_dump_register(void)
+static int vmbus_alloc_synic_and_connect(void)
{
- int ret;
+ int ret, cpu;
+ struct work_struct __percpu *works;
+ int hyperv_cpuhp_online;
- hv_panic_page = hv_alloc_hyperv_zeroed_page();
- if (!hv_panic_page) {
- pr_err("Hyper-V: panic message page memory allocation failed\n");
- return;
+ ret = hv_synic_alloc();
+ if (ret < 0)
+ goto err_alloc;
+
+ works = alloc_percpu(struct work_struct);
+ if (!works) {
+ ret = -ENOMEM;
+ goto err_alloc;
}
- ret = kmsg_dump_register(&hv_kmsg_dumper);
- if (ret) {
- pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
- hv_free_hyperv_page((unsigned long)hv_panic_page);
- hv_panic_page = NULL;
+ /*
+ * Initialize the per-cpu interrupt state and stimer state.
+ * Then connect to the host.
+ */
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct work_struct *work = per_cpu_ptr(works, cpu);
+
+ INIT_WORK(work, vmbus_percpu_work);
+ schedule_work_on(cpu, work);
}
-}
-static struct ctl_table_header *hv_ctl_table_hdr;
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(works, cpu));
-/*
- * sysctl option to allow the user to control whether kmsg data should be
- * reported to Hyper-V on panic.
- */
-static struct ctl_table hv_ctl_table[] = {
- {
- .procname = "hyperv_record_panic_msg",
- .data = &sysctl_record_panic_msg,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE
- },
- {}
-};
+ /* Register the callbacks for possible CPU online/offline'ing */
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
+ hv_synic_init, hv_synic_cleanup);
+ cpus_read_unlock();
+ free_percpu(works);
+ if (ret < 0)
+ goto err_alloc;
+ hyperv_cpuhp_online = ret;
-static struct ctl_table hv_root_table[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = hv_ctl_table
- },
- {}
-};
+ ret = vmbus_connect();
+ if (ret)
+ goto err_connect;
+ return 0;
+
+err_connect:
+ cpuhp_remove_state(hyperv_cpuhp_online);
+ return -ENODEV;
+err_alloc:
+ hv_synic_free();
+ return -ENOMEM;
+}
/*
* vmbus_bus_init -Main vmbus driver initialization routine.
@@ -1514,56 +1476,18 @@ static int vmbus_bus_init(void)
}
}
- ret = hv_synic_alloc();
- if (ret)
- goto err_alloc;
-
/*
- * Initialize the per-cpu interrupt state and stimer state.
- * Then connect to the host.
+ * Cache the value as getting it involves a VM exit on x86(_64), and
+ * doing that on each VP while initializing SynIC's wastes time.
*/
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
- hv_synic_init, hv_synic_cleanup);
- if (ret < 0)
- goto err_cpuhp;
- hyperv_cpuhp_online = ret;
-
- ret = vmbus_connect();
+ is_confidential = ms_hyperv.confidential_vmbus_available;
+ if (is_confidential)
+ pr_info("Establishing connection to the confidential VMBus\n");
+ hv_para_set_sint_proxy(!is_confidential);
+ ret = vmbus_alloc_synic_and_connect();
if (ret)
goto err_connect;
- if (hv_is_isolation_supported())
- sysctl_record_panic_msg = 0;
-
- /*
- * Only register if the crash MSRs are available
- */
- if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
- u64 hyperv_crash_ctl;
- /*
- * Panic message recording (sysctl_record_panic_msg)
- * is enabled by default in non-isolated guests and
- * disabled by default in isolated guests; the panic
- * message recording won't be available in isolated
- * guests should the following registration fail.
- */
- hv_ctl_table_hdr = register_sysctl_table(hv_root_table);
- if (!hv_ctl_table_hdr)
- pr_err("Hyper-V: sysctl table register error");
-
- /*
- * Register for panic kmsg callback only if the right
- * capability is supported by the hypervisor.
- */
- hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
- if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
- hv_kmsg_dump_register();
-
- register_die_notifier(&hyperv_die_report_block);
- atomic_notifier_chain_register(&panic_notifier_list,
- &hyperv_panic_report_block);
- }
-
/*
* Always register the vmbus unload panic notifier because we
* need to shut the VMbus channel connection on panic.
@@ -1576,10 +1500,6 @@ static int vmbus_bus_init(void)
return 0;
err_connect:
- cpuhp_remove_state(hyperv_cpuhp_online);
-err_cpuhp:
- hv_synic_free();
-err_alloc:
if (vmbus_irq == -1) {
hv_remove_vmbus_handler();
} else {
@@ -1588,8 +1508,6 @@ err_alloc:
}
err_setup:
bus_unregister(&hv_bus);
- unregister_sysctl_table(hv_ctl_table_hdr);
- hv_ctl_table_hdr = NULL;
return ret;
}
@@ -1781,16 +1699,16 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
{
return sprintf(buf, "%u\n", channel->target_cpu);
}
-static ssize_t target_cpu_store(struct vmbus_channel *channel,
- const char *buf, size_t count)
+
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu)
{
- u32 target_cpu, origin_cpu;
- ssize_t ret = count;
+ u32 origin_cpu;
+ int ret = 0;
- if (vmbus_proto_version < VERSION_WIN10_V4_1)
- return -EIO;
+ lockdep_assert_cpus_held();
+ lockdep_assert_held(&vmbus_connection.channel_mutex);
- if (sscanf(buf, "%uu", &target_cpu) != 1)
+ if (vmbus_proto_version < VERSION_WIN10_V4_1)
return -EIO;
/* Validate target_cpu for the cpumask_test_cpu() operation below. */
@@ -1800,22 +1718,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
return -EINVAL;
- /* No CPUs should come up or down during this. */
- cpus_read_lock();
-
- if (!cpu_online(target_cpu)) {
- cpus_read_unlock();
+ if (!cpu_online(target_cpu))
return -EINVAL;
- }
/*
- * Synchronizes target_cpu_store() and channel closure:
+ * Synchronizes vmbus_channel_set_cpu() and channel closure:
*
* { Initially: state = CHANNEL_OPENED }
*
* CPU1 CPU2
*
- * [target_cpu_store()] [vmbus_disconnect_ring()]
+ * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()]
*
* LOCK channel_mutex LOCK channel_mutex
* LOAD r1 = state LOAD r2 = state
@@ -1830,7 +1743,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
* Note. The host processes the channel messages "sequentially", in
* the order in which they are received on a per-partition basis.
*/
- mutex_lock(&vmbus_connection.channel_mutex);
/*
* Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
@@ -1838,17 +1750,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
*/
if (channel->state != CHANNEL_OPENED_STATE) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
origin_cpu = channel->target_cpu;
if (target_cpu == origin_cpu)
- goto cpu_store_unlock;
+ goto end;
if (vmbus_send_modifychannel(channel,
hv_cpu_number_to_vp_number(target_cpu))) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
/*
@@ -1878,10 +1790,26 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
origin_cpu, target_cpu);
}
-cpu_store_unlock:
+end:
+ return ret;
+}
+
+static ssize_t target_cpu_store(struct vmbus_channel *channel,
+ const char *buf, size_t count)
+{
+ u32 target_cpu;
+ ssize_t ret;
+
+ if (sscanf(buf, "%u", &target_cpu) != 1)
+ return -EIO;
+
+ cpus_read_lock();
+ mutex_lock(&vmbus_connection.channel_mutex);
+ ret = vmbus_channel_set_cpu(channel, target_cpu);
mutex_unlock(&vmbus_connection.channel_mutex);
cpus_read_unlock();
- return ret;
+
+ return ret ?: count;
}
static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
@@ -1962,6 +1890,26 @@ static ssize_t subchannel_id_show(struct vmbus_channel *channel,
}
static VMBUS_CHAN_ATTR_RO(subchannel_id);
+static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj);
+
+ /*
+ * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer
+ * is not NULL.
+ */
+ return channel->mmap_ring_buffer(channel, vma);
+}
+
+static struct bin_attribute chan_attr_ring_buffer = {
+ .attr = {
+ .name = "ring",
+ .mode = 0600,
+ },
+ .mmap = hv_mmap_ring_buffer_wrapper,
+};
static struct attribute *vmbus_chan_attrs[] = {
&chan_attr_out_mask.attr,
&chan_attr_in_mask.attr,
@@ -1981,6 +1929,11 @@ static struct attribute *vmbus_chan_attrs[] = {
NULL
};
+static const struct bin_attribute *vmbus_chan_bin_attrs[] = {
+ &chan_attr_ring_buffer,
+ NULL
+};
+
/*
* Channel-level attribute_group callback function. Returns the permission for
* each attribute, and returns 0 if an attribute is not visible.
@@ -2001,16 +1954,99 @@ static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj,
return attr->mode;
}
-static struct attribute_group vmbus_chan_group = {
+static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj,
+ const struct bin_attribute *attr, int idx)
+{
+ const struct vmbus_channel *channel =
+ container_of(kobj, struct vmbus_channel, kobj);
+
+ /* Hide ring attribute if channel's ring_sysfs_visible is set to false */
+ if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible)
+ return 0;
+
+ return attr->attr.mode;
+}
+
+static size_t vmbus_chan_bin_size(struct kobject *kobj,
+ const struct bin_attribute *bin_attr, int a)
+{
+ const struct vmbus_channel *channel =
+ container_of(kobj, struct vmbus_channel, kobj);
+
+ return channel->ringbuffer_pagecount << PAGE_SHIFT;
+}
+
+static const struct attribute_group vmbus_chan_group = {
.attrs = vmbus_chan_attrs,
- .is_visible = vmbus_chan_attr_is_visible
+ .bin_attrs = vmbus_chan_bin_attrs,
+ .is_visible = vmbus_chan_attr_is_visible,
+ .is_bin_visible = vmbus_chan_bin_attr_is_visible,
+ .bin_size = vmbus_chan_bin_size,
};
-static struct kobj_type vmbus_chan_ktype = {
+static const struct kobj_type vmbus_chan_ktype = {
.sysfs_ops = &vmbus_chan_sysfs_ops,
.release = vmbus_chan_release,
};
+/**
+ * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel.
+ * @channel: Pointer to vmbus_channel structure
+ * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of
+ * channel's "ring" sysfs node, which is for the ring buffer of that channel.
+ * Function pointer is of below type:
+ * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
+ * struct vm_area_struct *vma))
+ * This has a pointer to the channel and a pointer to vm_area_struct,
+ * used for mmap, as arguments.
+ *
+ * Sysfs node for ring buffer of a channel is created along with other fields, however its
+ * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case
+ * is running.
+ * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of
+ * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid
+ * exposing the ring buffer by default, this function is responsible to enable visibility of
+ * ring for userspace to use.
+ * Note: Race conditions can happen with userspace and it is not encouraged to create new
+ * use-cases for this. This was added to maintain backward compatibility, while solving
+ * one of the race conditions in uio_hv_generic while creating sysfs. See comments with
+ * vmbus_add_dynid() and vmbus_device_register().
+ *
+ * Returns 0 on success or error code on failure.
+ */
+int hv_create_ring_sysfs(struct vmbus_channel *channel,
+ int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
+ struct vm_area_struct *vma))
+{
+ struct kobject *kobj = &channel->kobj;
+
+ channel->mmap_ring_buffer = hv_mmap_ring_buffer;
+ channel->ring_sysfs_visible = true;
+
+ return sysfs_update_group(kobj, &vmbus_chan_group);
+}
+EXPORT_SYMBOL_GPL(hv_create_ring_sysfs);
+
+/**
+ * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel.
+ * @channel: Pointer to vmbus_channel structure
+ *
+ * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group.
+ *
+ * Returns 0 on success or error code on failure.
+ */
+int hv_remove_ring_sysfs(struct vmbus_channel *channel)
+{
+ struct kobject *kobj = &channel->kobj;
+ int ret;
+
+ channel->ring_sysfs_visible = false;
+ ret = sysfs_update_group(kobj, &vmbus_chan_group);
+ channel->mmap_ring_buffer = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs);
+
/*
* vmbus_add_channel_kobj - setup a sub-directory under device/channels
*/
@@ -2090,7 +2126,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
&child_device_obj->channel->offermsg.offer.if_instance);
child_device_obj->device.bus = &hv_bus;
- child_device_obj->device.parent = &hv_acpi_dev->dev;
+ child_device_obj->device.parent = vmbus_root_device;
child_device_obj->device.release = vmbus_device_release;
child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
@@ -2108,6 +2144,20 @@ int vmbus_device_register(struct hv_device *child_device_obj)
return ret;
}
+ /*
+ * If device_register() found a driver to assign to the device, the
+ * driver's probe function has already run at this point. If that
+ * probe function accesses or operates on the "channels" subdirectory
+ * in sysfs, those operations will have failed because the "channels"
+ * subdirectory doesn't exist until the code below runs. Or if the
+ * probe function creates a /dev entry, a user space program could
+ * find and open the /dev entry, and then create a race by accessing
+ * the "channels" subdirectory while the creation steps are in progress
+ * here. The race can't result in a kernel failure, but the user space
+ * program may get an error in accessing "channels" or its
+ * subdirectories. See also comments with vmbus_add_dynid() about a
+ * related race condition.
+ */
child_device_obj->channels_kset = kset_create_and_add("channels",
NULL, kobj);
if (!child_device_obj->channels_kset) {
@@ -2118,7 +2168,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
ret = vmbus_add_channel_kobj(child_device_obj,
child_device_obj->channel);
if (ret) {
- pr_err("Unable to register primary channeln");
+ pr_err("Unable to register primary channel\n");
goto err_kset_unregister;
}
hv_debug_add_dev_dir(child_device_obj);
@@ -2150,13 +2200,13 @@ void vmbus_device_unregister(struct hv_device *device_obj)
*/
device_unregister(&device_obj->device);
}
+EXPORT_SYMBOL_GPL(vmbus_device_unregister);
-
+#ifdef CONFIG_ACPI
/*
* VMBUS is an acpi enumerated device. Get the information we
* need from DSDT.
*/
-#define VTPM_BASE_ADDRESS 0xfed40000
static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
{
resource_size_t start = 0;
@@ -2261,8 +2311,9 @@ static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
return AE_OK;
}
+#endif
-static void vmbus_acpi_remove(struct acpi_device *device)
+static void vmbus_mmio_remove(void)
{
struct resource *cur_res;
struct resource *next_res;
@@ -2281,15 +2332,17 @@ static void vmbus_acpi_remove(struct acpi_device *device)
}
}
-static void vmbus_reserve_fb(void)
+static void __maybe_unused vmbus_reserve_fb(void)
{
resource_size_t start = 0, size;
struct pci_dev *pdev;
if (efi_enabled(EFI_BOOT)) {
/* Gen2 VM: get FB base from EFI framebuffer */
- start = screen_info.lfb_base;
- size = max_t(__u32, screen_info.lfb_size, 0x800000);
+ if (IS_ENABLED(CONFIG_SYSFB)) {
+ start = screen_info.lfb_base;
+ size = max_t(__u32, screen_info.lfb_size, 0x800000);
+ }
} else {
/* Gen1 VM: get FB base from PCI */
pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT,
@@ -2429,25 +2482,40 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size)
struct resource *iter;
mutex_lock(&hyperv_mmio_lock);
+
+ /*
+ * If all bytes of the MMIO range to be released are within the
+ * special case fb_mmio shadow region, skip releasing the shadow
+ * region since no corresponding __request_region() was done
+ * in vmbus_allocate_mmio().
+ */
+ if (fb_mmio && start >= fb_mmio->start &&
+ (start + size - 1 <= fb_mmio->end))
+ goto skip_shadow_release;
+
for (iter = hyperv_mmio; iter; iter = iter->sibling) {
if ((iter->start >= start + size) || (iter->end <= start))
continue;
__release_region(iter, start, size);
}
+
+skip_shadow_release:
release_mem_region(start, size);
mutex_unlock(&hyperv_mmio_lock);
}
EXPORT_SYMBOL_GPL(vmbus_free_mmio);
-static int vmbus_acpi_add(struct acpi_device *device)
+#ifdef CONFIG_ACPI
+static int vmbus_acpi_add(struct platform_device *pdev)
{
acpi_status result;
int ret_val = -ENODEV;
struct acpi_device *ancestor;
+ struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
- hv_acpi_dev = device;
+ vmbus_root_device = &device->dev;
/*
* Older versions of Hyper-V for ARM64 fail to include the _CCA
@@ -2473,7 +2541,8 @@ static int vmbus_acpi_add(struct acpi_device *device)
* Some ancestor of the vmbus acpi device (Gen1 or Gen2
* firmware) is the VMOD that has the mmio ranges. Get that.
*/
- for (ancestor = acpi_dev_parent(device); ancestor;
+ for (ancestor = acpi_dev_parent(device);
+ ancestor && ancestor->handle != ACPI_ROOT_OBJECT;
ancestor = acpi_dev_parent(ancestor)) {
result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS,
vmbus_walk_resources, NULL);
@@ -2489,9 +2558,94 @@ static int vmbus_acpi_add(struct acpi_device *device)
acpi_walk_err:
if (ret_val)
- vmbus_acpi_remove(device);
+ vmbus_mmio_remove();
return ret_val;
}
+#else
+static int vmbus_acpi_add(struct platform_device *pdev)
+{
+ return 0;
+}
+#endif
+#ifndef HYPERVISOR_CALLBACK_VECTOR
+static int vmbus_set_irq(struct platform_device *pdev)
+{
+ struct irq_data *data;
+ int irq;
+ irq_hw_number_t hwirq;
+
+ irq = platform_get_irq(pdev, 0);
+ /* platform_get_irq() may not return 0. */
+ if (irq < 0)
+ return irq;
+
+ data = irq_get_irq_data(irq);
+ if (!data) {
+ pr_err("No interrupt data for VMBus virq %d\n", irq);
+ return -ENODEV;
+ }
+ hwirq = irqd_to_hwirq(data);
+
+ vmbus_irq = irq;
+ vmbus_interrupt = hwirq;
+ pr_debug("VMBus virq %d, hwirq %d\n", vmbus_irq, vmbus_interrupt);
+
+ return 0;
+}
+#endif
+
+static int vmbus_device_add(struct platform_device *pdev)
+{
+ struct resource **cur_res = &hyperv_mmio;
+ struct of_range range;
+ struct of_range_parser parser;
+ struct device_node *np = pdev->dev.of_node;
+ int ret;
+
+ vmbus_root_device = &pdev->dev;
+
+ ret = of_range_parser_init(&parser, np);
+ if (ret)
+ return ret;
+
+#ifndef HYPERVISOR_CALLBACK_VECTOR
+ ret = vmbus_set_irq(pdev);
+ if (ret)
+ return ret;
+#endif
+ for_each_of_range(&parser, &range) {
+ struct resource *res;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ vmbus_mmio_remove();
+ return -ENOMEM;
+ }
+
+ res->name = "hyperv mmio";
+ res->flags = range.flags;
+ res->start = range.cpu_addr;
+ res->end = range.cpu_addr + range.size;
+
+ *cur_res = res;
+ cur_res = &res->sibling;
+ }
+
+ return ret;
+}
+
+static int vmbus_platform_driver_probe(struct platform_device *pdev)
+{
+ if (acpi_disabled)
+ return vmbus_device_add(pdev);
+ else
+ return vmbus_acpi_add(pdev);
+}
+
+static void vmbus_platform_driver_remove(struct platform_device *pdev)
+{
+ vmbus_mmio_remove();
+}
#ifdef CONFIG_PM_SLEEP
static int vmbus_bus_suspend(struct device *dev)
@@ -2537,11 +2691,6 @@ static int vmbus_bus_suspend(struct device *dev)
if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
wait_for_completion(&vmbus_connection.ready_for_suspend_event);
- if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) {
- pr_err("Can not suspend due to a previous failed resuming\n");
- return -EBUSY;
- }
-
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
@@ -2566,22 +2715,18 @@ static int vmbus_bus_suspend(struct device *dev)
pr_err("Sub-channel not deleted!\n");
WARN_ON_ONCE(1);
}
-
- atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
}
mutex_unlock(&vmbus_connection.channel_mutex);
vmbus_initiate_unload(false);
- /* Reset the event for the next resume. */
- reinit_completion(&vmbus_connection.ready_for_resume_event);
-
return 0;
}
static int vmbus_bus_resume(struct device *dev)
{
+ struct vmbus_channel *channel;
struct vmbus_channel_msginfo *msginfo;
size_t msgsize;
int ret;
@@ -2612,13 +2757,23 @@ static int vmbus_bus_resume(struct device *dev)
if (ret != 0)
return ret;
- WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
-
vmbus_request_offers();
- if (wait_for_completion_timeout(
- &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0)
- pr_err("Some vmbus device is missing after suspending?\n");
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ continue;
+
+ /* hvsock channels are not expected to be present. */
+ if (is_hvsock_channel(channel))
+ continue;
+
+ pr_err("channel %pUl/%pUl not present after resume.\n",
+ &channel->offermsg.offer.if_type,
+ &channel->offermsg.offer.if_instance);
+ /* ToDo: Cleanup these channels here */
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
/* Reset the event for the next suspend. */
reinit_completion(&vmbus_connection.ready_for_suspend_event);
@@ -2630,7 +2785,17 @@ static int vmbus_bus_resume(struct device *dev)
#define vmbus_bus_resume NULL
#endif /* CONFIG_PM_SLEEP */
-static const struct acpi_device_id vmbus_acpi_device_ids[] = {
+static const __maybe_unused struct of_device_id vmbus_of_match[] = {
+ {
+ .compatible = "microsoft,vmbus",
+ },
+ {
+ /* sentinel */
+ },
+};
+MODULE_DEVICE_TABLE(of, vmbus_of_match);
+
+static const __maybe_unused struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
{"VMBus", 0},
{"", 0},
@@ -2658,15 +2823,16 @@ static const struct dev_pm_ops vmbus_bus_pm = {
.restore_noirq = vmbus_bus_resume
};
-static struct acpi_driver vmbus_acpi_driver = {
- .name = "vmbus",
- .ids = vmbus_acpi_device_ids,
- .ops = {
- .add = vmbus_acpi_add,
- .remove = vmbus_acpi_remove,
- },
- .drv.pm = &vmbus_bus_pm,
- .drv.probe_type = PROBE_FORCE_SYNCHRONOUS,
+static struct platform_driver vmbus_platform_driver = {
+ .probe = vmbus_platform_driver_probe,
+ .remove = vmbus_platform_driver_remove,
+ .driver = {
+ .name = "vmbus",
+ .acpi_match_table = ACPI_PTR(vmbus_acpi_device_ids),
+ .of_match_table = of_match_ptr(vmbus_of_match),
+ .pm = &vmbus_bus_pm,
+ .probe_type = PROBE_FORCE_SYNCHRONOUS,
+ }
};
static void hv_kexec_handler(void)
@@ -2690,10 +2856,10 @@ static void hv_crash_handler(struct pt_regs *regs)
*/
cpu = smp_processor_id();
hv_stimer_cleanup(cpu);
- hv_synic_disable_regs(cpu);
+ hv_hyp_synic_disable_regs(cpu);
};
-static int hv_synic_suspend(void)
+static int hv_synic_suspend(void *data)
{
/*
* When we reach here, all the non-boot CPUs have been offlined.
@@ -2715,14 +2881,14 @@ static int hv_synic_suspend(void)
* interrupts-disabled context.
*/
- hv_synic_disable_regs(0);
+ hv_hyp_synic_disable_regs(0);
return 0;
}
-static void hv_synic_resume(void)
+static void hv_synic_resume(void *data)
{
- hv_synic_enable_regs(0);
+ hv_hyp_synic_enable_regs(0);
/*
* Note: we don't need to call hv_stimer_init(0), because the timer
@@ -2732,11 +2898,15 @@ static void hv_synic_resume(void)
}
/* The callbacks run only on CPU0, with irqs_disabled. */
-static struct syscore_ops hv_synic_syscore_ops = {
+static const struct syscore_ops hv_synic_syscore_ops = {
.suspend = hv_synic_suspend,
.resume = hv_synic_resume,
};
+static struct syscore hv_synic_syscore = {
+ .ops = &hv_synic_syscore_ops,
+};
+
static int __init hv_acpi_init(void)
{
int ret;
@@ -2744,18 +2914,17 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition() && !hv_nested)
return 0;
/*
* Get ACPI resources first.
*/
- ret = acpi_bus_register_driver(&vmbus_acpi_driver);
-
+ ret = platform_driver_register(&vmbus_platform_driver);
if (ret)
return ret;
- if (!hv_acpi_dev) {
+ if (!vmbus_root_device) {
ret = -ENODEV;
goto cleanup;
}
@@ -2780,13 +2949,13 @@ static int __init hv_acpi_init(void)
hv_setup_kexec_handler(hv_kexec_handler);
hv_setup_crash_handler(hv_crash_handler);
- register_syscore_ops(&hv_synic_syscore_ops);
+ register_syscore(&hv_synic_syscore);
return 0;
cleanup:
- acpi_bus_unregister_driver(&vmbus_acpi_driver);
- hv_acpi_dev = NULL;
+ platform_driver_unregister(&vmbus_platform_driver);
+ vmbus_root_device = NULL;
return ret;
}
@@ -2794,7 +2963,7 @@ static void __exit vmbus_exit(void)
{
int cpu;
- unregister_syscore_ops(&hv_synic_syscore_ops);
+ unregister_syscore(&hv_synic_syscore);
hv_remove_kexec_handler();
hv_remove_crash_handler();
@@ -2818,13 +2987,6 @@ static void __exit vmbus_exit(void)
vmbus_free_channels();
kfree(vmbus_connection.channels);
- if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
- kmsg_dump_unregister(&hv_kmsg_dumper);
- unregister_die_notifier(&hyperv_die_report_block);
- atomic_notifier_chain_unregister(&panic_notifier_list,
- &hyperv_panic_report_block);
- }
-
/*
* The vmbus panic notifier is always registered, hence we should
* also unconditionally unregister it here as well.
@@ -2832,14 +2994,11 @@ static void __exit vmbus_exit(void)
atomic_notifier_chain_unregister(&panic_notifier_list,
&hyperv_panic_vmbus_unload_block);
- free_page((unsigned long)hv_panic_page);
- unregister_sysctl_table(hv_ctl_table_hdr);
- hv_ctl_table_hdr = NULL;
bus_unregister(&hv_bus);
cpuhp_remove_state(hyperv_cpuhp_online);
hv_synic_free();
- acpi_bus_unregister_driver(&vmbus_acpi_driver);
+ platform_driver_unregister(&vmbus_platform_driver);
}