summaryrefslogtreecommitdiff
path: root/drivers/hv/vmbus_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv/vmbus_drv.c')
-rw-r--r--drivers/hv/vmbus_drv.c350
1 files changed, 293 insertions, 57 deletions
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 12a707ab73f8..2ed5a1e89d69 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -45,7 +45,8 @@ struct vmbus_dynid {
struct hv_vmbus_device_id id;
};
-static struct device *hv_dev;
+/* VMBus Root Device */
+static struct device *vmbus_root_device;
static int hyperv_cpuhp_online;
@@ -80,9 +81,15 @@ static struct resource *fb_mmio;
static struct resource *hyperv_mmio;
static DEFINE_MUTEX(hyperv_mmio_lock);
+struct device *hv_get_vmbus_root_device(void)
+{
+ return vmbus_root_device;
+}
+EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device);
+
static int vmbus_exists(void)
{
- if (hv_dev == NULL)
+ if (vmbus_root_device == NULL)
return -ENODEV;
return 0;
@@ -685,7 +692,7 @@ static const struct hv_vmbus_device_id vmbus_device_null;
* Return a matching hv_vmbus_device_id pointer.
* If there is no match, return NULL.
*/
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(const struct hv_driver *drv,
struct hv_device *dev)
{
const guid_t *guid = &dev->dev_type;
@@ -696,7 +703,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
return NULL;
/* Look at the dynamic ids first, before the static ones */
- id = hv_vmbus_dynid_match(drv, guid);
+ id = hv_vmbus_dynid_match((struct hv_driver *)drv, guid);
if (!id)
id = hv_vmbus_dev_match(drv->id_table, guid);
@@ -707,7 +714,30 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
return id;
}
-/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
+/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices
+ *
+ * This function can race with vmbus_device_register(). This function is
+ * typically running on a user thread in response to writing to the "new_id"
+ * sysfs entry for a driver. vmbus_device_register() is running on a
+ * workqueue thread in response to the Hyper-V host offering a device to the
+ * guest. This function calls driver_attach(), which looks for an existing
+ * device matching the new id, and attaches the driver to which the new id
+ * has been assigned. vmbus_device_register() calls device_register(), which
+ * looks for a driver that matches the device being registered. If both
+ * operations are running simultaneously, the device driver probe function runs
+ * on whichever thread establishes the linkage between the driver and device.
+ *
+ * In most cases, it doesn't matter which thread runs the driver probe
+ * function. But if vmbus_device_register() does not find a matching driver,
+ * it proceeds to create the "channels" subdirectory and numbered per-channel
+ * subdirectory in sysfs. While that multi-step creation is in progress, this
+ * function could run the driver probe function. If the probe function checks
+ * for, or operates on, entries in the "channels" subdirectory, including by
+ * calling hv_create_ring_sysfs(), the operation may or may not succeed
+ * depending on the race. The race can't create a kernel failure in VMBus
+ * or device subsystem code, but probe functions in VMBus drivers doing such
+ * operations must be prepared for the failure case.
+ */
static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
{
struct vmbus_dynid *dynid;
@@ -809,9 +839,9 @@ ATTRIBUTE_GROUPS(vmbus_drv);
/*
* vmbus_match - Attempt to match the specified device to the specified driver
*/
-static int vmbus_match(struct device *device, struct device_driver *driver)
+static int vmbus_match(struct device *device, const struct device_driver *driver)
{
- struct hv_driver *drv = drv_to_hv_drv(driver);
+ const struct hv_driver *drv = drv_to_hv_drv(driver);
struct hv_device *hv_dev = device_to_hv_device(device);
/* The hv_sock driver handles all hv_sock offers. */
@@ -861,7 +891,7 @@ static int vmbus_dma_configure(struct device *child_device)
* On x86/x64 coherence is assumed and these calls have no effect.
*/
hv_setup_dma_ops(child_device,
- device_get_dma_attr(hv_dev) == DEV_DMA_COHERENT);
+ device_get_dma_attr(vmbus_root_device) == DEV_DMA_COHERENT);
return 0;
}
@@ -1306,6 +1336,13 @@ static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static void vmbus_percpu_work(struct work_struct *work)
+{
+ unsigned int cpu = smp_processor_id();
+
+ hv_synic_init(cpu);
+}
+
/*
* vmbus_bus_init -Main vmbus driver initialization routine.
*
@@ -1316,7 +1353,8 @@ static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
*/
static int vmbus_bus_init(void)
{
- int ret;
+ int ret, cpu;
+ struct work_struct __percpu *works;
ret = hv_init();
if (ret != 0) {
@@ -1355,12 +1393,32 @@ static int vmbus_bus_init(void)
if (ret)
goto err_alloc;
+ works = alloc_percpu(struct work_struct);
+ if (!works) {
+ ret = -ENOMEM;
+ goto err_alloc;
+ }
+
/*
* Initialize the per-cpu interrupt state and stimer state.
* Then connect to the host.
*/
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
- hv_synic_init, hv_synic_cleanup);
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct work_struct *work = per_cpu_ptr(works, cpu);
+
+ INIT_WORK(work, vmbus_percpu_work);
+ schedule_work_on(cpu, work);
+ }
+
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(works, cpu));
+
+ /* Register the callbacks for possible CPU online/offline'ing */
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
+ hv_synic_init, hv_synic_cleanup);
+ cpus_read_unlock();
+ free_percpu(works);
if (ret < 0)
goto err_alloc;
hyperv_cpuhp_online = ret;
@@ -1583,16 +1641,16 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
{
return sprintf(buf, "%u\n", channel->target_cpu);
}
-static ssize_t target_cpu_store(struct vmbus_channel *channel,
- const char *buf, size_t count)
+
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu)
{
- u32 target_cpu, origin_cpu;
- ssize_t ret = count;
+ u32 origin_cpu;
+ int ret = 0;
- if (vmbus_proto_version < VERSION_WIN10_V4_1)
- return -EIO;
+ lockdep_assert_cpus_held();
+ lockdep_assert_held(&vmbus_connection.channel_mutex);
- if (sscanf(buf, "%uu", &target_cpu) != 1)
+ if (vmbus_proto_version < VERSION_WIN10_V4_1)
return -EIO;
/* Validate target_cpu for the cpumask_test_cpu() operation below. */
@@ -1602,22 +1660,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
return -EINVAL;
- /* No CPUs should come up or down during this. */
- cpus_read_lock();
-
- if (!cpu_online(target_cpu)) {
- cpus_read_unlock();
+ if (!cpu_online(target_cpu))
return -EINVAL;
- }
/*
- * Synchronizes target_cpu_store() and channel closure:
+ * Synchronizes vmbus_channel_set_cpu() and channel closure:
*
* { Initially: state = CHANNEL_OPENED }
*
* CPU1 CPU2
*
- * [target_cpu_store()] [vmbus_disconnect_ring()]
+ * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()]
*
* LOCK channel_mutex LOCK channel_mutex
* LOAD r1 = state LOAD r2 = state
@@ -1632,7 +1685,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
* Note. The host processes the channel messages "sequentially", in
* the order in which they are received on a per-partition basis.
*/
- mutex_lock(&vmbus_connection.channel_mutex);
/*
* Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
@@ -1640,17 +1692,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
*/
if (channel->state != CHANNEL_OPENED_STATE) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
origin_cpu = channel->target_cpu;
if (target_cpu == origin_cpu)
- goto cpu_store_unlock;
+ goto end;
if (vmbus_send_modifychannel(channel,
hv_cpu_number_to_vp_number(target_cpu))) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
/*
@@ -1680,10 +1732,26 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
origin_cpu, target_cpu);
}
-cpu_store_unlock:
+end:
+ return ret;
+}
+
+static ssize_t target_cpu_store(struct vmbus_channel *channel,
+ const char *buf, size_t count)
+{
+ u32 target_cpu;
+ ssize_t ret;
+
+ if (sscanf(buf, "%uu", &target_cpu) != 1)
+ return -EIO;
+
+ cpus_read_lock();
+ mutex_lock(&vmbus_connection.channel_mutex);
+ ret = vmbus_channel_set_cpu(channel, target_cpu);
mutex_unlock(&vmbus_connection.channel_mutex);
cpus_read_unlock();
- return ret;
+
+ return ret ?: count;
}
static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
@@ -1764,6 +1832,26 @@ static ssize_t subchannel_id_show(struct vmbus_channel *channel,
}
static VMBUS_CHAN_ATTR_RO(subchannel_id);
+static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj);
+
+ /*
+ * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer
+ * is not NULL.
+ */
+ return channel->mmap_ring_buffer(channel, vma);
+}
+
+static struct bin_attribute chan_attr_ring_buffer = {
+ .attr = {
+ .name = "ring",
+ .mode = 0600,
+ },
+ .mmap = hv_mmap_ring_buffer_wrapper,
+};
static struct attribute *vmbus_chan_attrs[] = {
&chan_attr_out_mask.attr,
&chan_attr_in_mask.attr,
@@ -1783,6 +1871,11 @@ static struct attribute *vmbus_chan_attrs[] = {
NULL
};
+static const struct bin_attribute *vmbus_chan_bin_attrs[] = {
+ &chan_attr_ring_buffer,
+ NULL
+};
+
/*
* Channel-level attribute_group callback function. Returns the permission for
* each attribute, and returns 0 if an attribute is not visible.
@@ -1803,16 +1896,99 @@ static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj,
return attr->mode;
}
-static struct attribute_group vmbus_chan_group = {
+static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj,
+ const struct bin_attribute *attr, int idx)
+{
+ const struct vmbus_channel *channel =
+ container_of(kobj, struct vmbus_channel, kobj);
+
+ /* Hide ring attribute if channel's ring_sysfs_visible is set to false */
+ if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible)
+ return 0;
+
+ return attr->attr.mode;
+}
+
+static size_t vmbus_chan_bin_size(struct kobject *kobj,
+ const struct bin_attribute *bin_attr, int a)
+{
+ const struct vmbus_channel *channel =
+ container_of(kobj, struct vmbus_channel, kobj);
+
+ return channel->ringbuffer_pagecount << PAGE_SHIFT;
+}
+
+static const struct attribute_group vmbus_chan_group = {
.attrs = vmbus_chan_attrs,
- .is_visible = vmbus_chan_attr_is_visible
+ .bin_attrs = vmbus_chan_bin_attrs,
+ .is_visible = vmbus_chan_attr_is_visible,
+ .is_bin_visible = vmbus_chan_bin_attr_is_visible,
+ .bin_size = vmbus_chan_bin_size,
};
-static struct kobj_type vmbus_chan_ktype = {
+static const struct kobj_type vmbus_chan_ktype = {
.sysfs_ops = &vmbus_chan_sysfs_ops,
.release = vmbus_chan_release,
};
+/**
+ * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel.
+ * @channel: Pointer to vmbus_channel structure
+ * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of
+ * channel's "ring" sysfs node, which is for the ring buffer of that channel.
+ * Function pointer is of below type:
+ * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
+ * struct vm_area_struct *vma))
+ * This has a pointer to the channel and a pointer to vm_area_struct,
+ * used for mmap, as arguments.
+ *
+ * Sysfs node for ring buffer of a channel is created along with other fields, however its
+ * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case
+ * is running.
+ * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of
+ * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid
+ * exposing the ring buffer by default, this function is reponsible to enable visibility of
+ * ring for userspace to use.
+ * Note: Race conditions can happen with userspace and it is not encouraged to create new
+ * use-cases for this. This was added to maintain backward compatibility, while solving
+ * one of the race conditions in uio_hv_generic while creating sysfs. See comments with
+ * vmbus_add_dynid() and vmbus_device_register().
+ *
+ * Returns 0 on success or error code on failure.
+ */
+int hv_create_ring_sysfs(struct vmbus_channel *channel,
+ int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
+ struct vm_area_struct *vma))
+{
+ struct kobject *kobj = &channel->kobj;
+
+ channel->mmap_ring_buffer = hv_mmap_ring_buffer;
+ channel->ring_sysfs_visible = true;
+
+ return sysfs_update_group(kobj, &vmbus_chan_group);
+}
+EXPORT_SYMBOL_GPL(hv_create_ring_sysfs);
+
+/**
+ * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel.
+ * @channel: Pointer to vmbus_channel structure
+ *
+ * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group.
+ *
+ * Returns 0 on success or error code on failure.
+ */
+int hv_remove_ring_sysfs(struct vmbus_channel *channel)
+{
+ struct kobject *kobj = &channel->kobj;
+ int ret;
+
+ channel->ring_sysfs_visible = false;
+ ret = sysfs_update_group(kobj, &vmbus_chan_group);
+ channel->mmap_ring_buffer = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs);
+
/*
* vmbus_add_channel_kobj - setup a sub-directory under device/channels
*/
@@ -1892,7 +2068,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
&child_device_obj->channel->offermsg.offer.if_instance);
child_device_obj->device.bus = &hv_bus;
- child_device_obj->device.parent = hv_dev;
+ child_device_obj->device.parent = vmbus_root_device;
child_device_obj->device.release = vmbus_device_release;
child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
@@ -1910,6 +2086,20 @@ int vmbus_device_register(struct hv_device *child_device_obj)
return ret;
}
+ /*
+ * If device_register() found a driver to assign to the device, the
+ * driver's probe function has already run at this point. If that
+ * probe function accesses or operates on the "channels" subdirectory
+ * in sysfs, those operations will have failed because the "channels"
+ * subdirectory doesn't exist until the code below runs. Or if the
+ * probe function creates a /dev entry, a user space program could
+ * find and open the /dev entry, and then create a race by accessing
+ * the "channels" subdirectory while the creation steps are in progress
+ * here. The race can't result in a kernel failure, but the user space
+ * program may get an error in accessing "channels" or its
+ * subdirectories. See also comments with vmbus_add_dynid() about a
+ * related race condition.
+ */
child_device_obj->channels_kset = kset_create_and_add("channels",
NULL, kobj);
if (!child_device_obj->channels_kset) {
@@ -1952,6 +2142,7 @@ void vmbus_device_unregister(struct hv_device *device_obj)
*/
device_unregister(&device_obj->device);
}
+EXPORT_SYMBOL_GPL(vmbus_device_unregister);
#ifdef CONFIG_ACPI
/*
@@ -2233,12 +2424,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size)
struct resource *iter;
mutex_lock(&hyperv_mmio_lock);
+
+ /*
+ * If all bytes of the MMIO range to be released are within the
+ * special case fb_mmio shadow region, skip releasing the shadow
+ * region since no corresponding __request_region() was done
+ * in vmbus_allocate_mmio().
+ */
+ if (fb_mmio && start >= fb_mmio->start &&
+ (start + size - 1 <= fb_mmio->end))
+ goto skip_shadow_release;
+
for (iter = hyperv_mmio; iter; iter = iter->sibling) {
if ((iter->start >= start + size) || (iter->end <= start))
continue;
__release_region(iter, start, size);
}
+
+skip_shadow_release:
release_mem_region(start, size);
mutex_unlock(&hyperv_mmio_lock);
@@ -2253,7 +2457,7 @@ static int vmbus_acpi_add(struct platform_device *pdev)
struct acpi_device *ancestor;
struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
- hv_dev = &device->dev;
+ vmbus_root_device = &device->dev;
/*
* Older versions of Hyper-V for ARM64 fail to include the _CCA
@@ -2305,6 +2509,32 @@ static int vmbus_acpi_add(struct platform_device *pdev)
return 0;
}
#endif
+#ifndef HYPERVISOR_CALLBACK_VECTOR
+static int vmbus_set_irq(struct platform_device *pdev)
+{
+ struct irq_data *data;
+ int irq;
+ irq_hw_number_t hwirq;
+
+ irq = platform_get_irq(pdev, 0);
+ /* platform_get_irq() may not return 0. */
+ if (irq < 0)
+ return irq;
+
+ data = irq_get_irq_data(irq);
+ if (!data) {
+ pr_err("No interrupt data for VMBus virq %d\n", irq);
+ return -ENODEV;
+ }
+ hwirq = irqd_to_hwirq(data);
+
+ vmbus_irq = irq;
+ vmbus_interrupt = hwirq;
+ pr_debug("VMBus virq %d, hwirq %d\n", vmbus_irq, vmbus_interrupt);
+
+ return 0;
+}
+#endif
static int vmbus_device_add(struct platform_device *pdev)
{
@@ -2314,12 +2544,17 @@ static int vmbus_device_add(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
int ret;
- hv_dev = &pdev->dev;
+ vmbus_root_device = &pdev->dev;
ret = of_range_parser_init(&parser, np);
if (ret)
return ret;
+#ifndef HYPERVISOR_CALLBACK_VECTOR
+ ret = vmbus_set_irq(pdev);
+ if (ret)
+ return ret;
+#endif
for_each_of_range(&parser, &range) {
struct resource *res;
@@ -2398,11 +2633,6 @@ static int vmbus_bus_suspend(struct device *dev)
if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
wait_for_completion(&vmbus_connection.ready_for_suspend_event);
- if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) {
- pr_err("Can not suspend due to a previous failed resuming\n");
- return -EBUSY;
- }
-
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
@@ -2427,22 +2657,18 @@ static int vmbus_bus_suspend(struct device *dev)
pr_err("Sub-channel not deleted!\n");
WARN_ON_ONCE(1);
}
-
- atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
}
mutex_unlock(&vmbus_connection.channel_mutex);
vmbus_initiate_unload(false);
- /* Reset the event for the next resume. */
- reinit_completion(&vmbus_connection.ready_for_resume_event);
-
return 0;
}
static int vmbus_bus_resume(struct device *dev)
{
+ struct vmbus_channel *channel;
struct vmbus_channel_msginfo *msginfo;
size_t msgsize;
int ret;
@@ -2473,13 +2699,23 @@ static int vmbus_bus_resume(struct device *dev)
if (ret != 0)
return ret;
- WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
-
vmbus_request_offers();
- if (wait_for_completion_timeout(
- &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0)
- pr_err("Some vmbus device is missing after suspending?\n");
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ continue;
+
+ /* hvsock channels are not expected to be present. */
+ if (is_hvsock_channel(channel))
+ continue;
+
+ pr_err("channel %pUl/%pUl not present after resume.\n",
+ &channel->offermsg.offer.if_type,
+ &channel->offermsg.offer.if_instance);
+ /* ToDo: Cleanup these channels here */
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
/* Reset the event for the next suspend. */
reinit_completion(&vmbus_connection.ready_for_suspend_event);
@@ -2531,7 +2767,7 @@ static const struct dev_pm_ops vmbus_bus_pm = {
static struct platform_driver vmbus_platform_driver = {
.probe = vmbus_platform_driver_probe,
- .remove_new = vmbus_platform_driver_remove,
+ .remove = vmbus_platform_driver_remove,
.driver = {
.name = "vmbus",
.acpi_match_table = ACPI_PTR(vmbus_acpi_device_ids),
@@ -2616,7 +2852,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition && !hv_nested)
+ if (hv_root_partition() && !hv_nested)
return 0;
/*
@@ -2626,7 +2862,7 @@ static int __init hv_acpi_init(void)
if (ret)
return ret;
- if (!hv_dev) {
+ if (!vmbus_root_device) {
ret = -ENODEV;
goto cleanup;
}
@@ -2657,7 +2893,7 @@ static int __init hv_acpi_init(void)
cleanup:
platform_driver_unregister(&vmbus_platform_driver);
- hv_dev = NULL;
+ vmbus_root_device = NULL;
return ret;
}