diff options
Diffstat (limited to 'drivers/uio/uio_hv_generic.c')
-rw-r--r-- | drivers/uio/uio_hv_generic.c | 188 |
1 files changed, 107 insertions, 81 deletions
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 6be3462b109f..f19efad4d6f8 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -36,7 +36,6 @@ #define DRIVER_AUTHOR "Stephen Hemminger <sthemmin at microsoft.com>" #define DRIVER_DESC "Generic UIO driver for VMBus devices" -#define HV_RING_SIZE 512 /* pages */ #define SEND_BUFFER_SIZE (16 * 1024 * 1024) #define RECV_BUFFER_SIZE (31 * 1024 * 1024) @@ -66,6 +65,16 @@ struct hv_uio_private_data { char send_name[32]; }; +static void set_event(struct vmbus_channel *channel, s32 irq_state) +{ + channel->inbound.ring_buffer->interrupt_mask = !irq_state; + if (!channel->offermsg.monitor_allocated && irq_state) { + /* MB is needed for host to see the interrupt mask first */ + virt_mb(); + vmbus_set_event(channel); + } +} + /* * This is the irqcontrol callback to be registered to uio_info. * It can be used to disable/enable interrupt from user space processes. @@ -80,9 +89,15 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state) { struct hv_uio_private_data *pdata = info->priv; struct hv_device *dev = pdata->device; + struct vmbus_channel *primary, *sc; - dev->channel->inbound.ring_buffer->interrupt_mask = !irq_state; - virt_mb(); + primary = dev->channel; + set_event(primary, irq_state); + + mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(sc, &primary->sc_list, sc_list) + set_event(sc, irq_state); + mutex_unlock(&vmbus_connection.channel_mutex); return 0; } @@ -93,21 +108,29 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state) static void hv_uio_channel_cb(void *context) { struct vmbus_channel *chan = context; - struct hv_device *hv_dev = chan->device_obj; - struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev); + struct hv_device *hv_dev; + struct hv_uio_private_data *pdata; chan->inbound.ring_buffer->interrupt_mask = 1; virt_mb(); + /* + * The callback may come from a subchannel, in which case look + * for the hv device in the primary channel + */ + hv_dev = chan->primary_channel ? + chan->primary_channel->device_obj : chan->device_obj; + pdata = hv_get_drvdata(hv_dev); uio_event_notify(&pdata->info); } /* * Callback from vmbus_event when channel is rescinded. + * It is meant for rescind of primary channels only. */ static void hv_uio_rescind(struct vmbus_channel *channel) { - struct hv_device *hv_dev = channel->primary_channel->device_obj; + struct hv_device *hv_dev = channel->device_obj; struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev); /* @@ -118,17 +141,22 @@ static void hv_uio_rescind(struct vmbus_channel *channel) /* Wake up reader */ uio_event_notify(&pdata->info); + + /* + * With rescind callback registered, rescind path will not unregister the device + * from vmbus when the primary channel is rescinded. + * Without it, rescind handling is incomplete and next onoffer msg does not come. + * Unregister the device from vmbus here. + */ + vmbus_device_unregister(channel->device_obj); } -/* Sysfs API to allow mmap of the ring buffers +/* Function used for mmap of ring buffer sysfs interface. * The ring buffer is allocated as contiguous memory by vmbus_open */ -static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, - struct vm_area_struct *vma) +static int +hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma) { - struct vmbus_channel *channel - = container_of(kobj, struct vmbus_channel, kobj); void *ring_buffer = page_address(channel->ringbuffer_page); if (channel->state != CHANNEL_OPENED_STATE) @@ -138,22 +166,13 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, channel->ringbuffer_pagecount << PAGE_SHIFT); } -static const struct bin_attribute ring_buffer_bin_attr = { - .attr = { - .name = "ring", - .mode = 0600, - }, - .size = 2 * HV_RING_SIZE * PAGE_SIZE, - .mmap = hv_uio_ring_mmap, -}; - /* Callback from VMBUS subsystem when new channel created. */ static void hv_uio_new_channel(struct vmbus_channel *new_sc) { struct hv_device *hv_dev = new_sc->primary_channel->device_obj; struct device *device = &hv_dev->device; - const size_t ring_bytes = HV_RING_SIZE * PAGE_SIZE; + const size_t ring_bytes = SZ_2M; int ret; /* Create host communication ring */ @@ -167,8 +186,7 @@ hv_uio_new_channel(struct vmbus_channel *new_sc) /* Disable interrupts on sub channel */ new_sc->inbound.ring_buffer->interrupt_mask = 1; set_channel_read_mode(new_sc, HV_CALL_ISR); - - ret = sysfs_create_bin_file(&new_sc->kobj, &ring_buffer_bin_attr); + ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap); if (ret) { dev_err(device, "sysfs create ring bin file failed; %d\n", ret); vmbus_close(new_sc); @@ -240,19 +258,19 @@ hv_uio_probe(struct hv_device *dev, struct hv_uio_private_data *pdata; void *ring_buffer; int ret; + size_t ring_size = hv_dev_ring_size(channel); - /* Communicating with host has to be via shared memory not hypercall */ - if (!channel->offermsg.monitor_allocated) { - dev_err(&dev->device, "vmbus channel requires hypercall\n"); - return -ENOTSUPP; - } + if (!ring_size) + ring_size = SZ_2M; + + /* Adjust ring size if necessary to have it page aligned */ + ring_size = VMBUS_RING_SIZE(ring_size); pdata = devm_kzalloc(&dev->device, sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE, - HV_RING_SIZE * PAGE_SIZE); + ret = vmbus_alloc_ring(channel, ring_size, ring_size); if (ret) return ret; @@ -279,60 +297,60 @@ hv_uio_probe(struct hv_device *dev, pdata->info.mem[INT_PAGE_MAP].name = "int_page"; pdata->info.mem[INT_PAGE_MAP].addr = (uintptr_t)vmbus_connection.int_page; - pdata->info.mem[INT_PAGE_MAP].size = PAGE_SIZE; + pdata->info.mem[INT_PAGE_MAP].size = HV_HYP_PAGE_SIZE; pdata->info.mem[INT_PAGE_MAP].memtype = UIO_MEM_LOGICAL; pdata->info.mem[MON_PAGE_MAP].name = "monitor_page"; pdata->info.mem[MON_PAGE_MAP].addr = (uintptr_t)vmbus_connection.monitor_pages[1]; - pdata->info.mem[MON_PAGE_MAP].size = PAGE_SIZE; + pdata->info.mem[MON_PAGE_MAP].size = HV_HYP_PAGE_SIZE; pdata->info.mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL; - pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE); - if (pdata->recv_buf == NULL) { - ret = -ENOMEM; - goto fail_free_ring; + if (channel->device_id == HV_NIC) { + pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE); + if (!pdata->recv_buf) { + ret = -ENOMEM; + goto fail_free_ring; + } + + ret = vmbus_establish_gpadl(channel, pdata->recv_buf, + RECV_BUFFER_SIZE, &pdata->recv_gpadl); + if (ret) { + if (!pdata->recv_gpadl.decrypted) + vfree(pdata->recv_buf); + goto fail_close; + } + + /* put Global Physical Address Label in name */ + snprintf(pdata->recv_name, sizeof(pdata->recv_name), + "recv:%u", pdata->recv_gpadl.gpadl_handle); + pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name; + pdata->info.mem[RECV_BUF_MAP].addr = (uintptr_t)pdata->recv_buf; + pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE; + pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL; + + pdata->send_buf = vzalloc(SEND_BUFFER_SIZE); + if (!pdata->send_buf) { + ret = -ENOMEM; + goto fail_close; + } + + ret = vmbus_establish_gpadl(channel, pdata->send_buf, + SEND_BUFFER_SIZE, &pdata->send_gpadl); + if (ret) { + if (!pdata->send_gpadl.decrypted) + vfree(pdata->send_buf); + goto fail_close; + } + + snprintf(pdata->send_name, sizeof(pdata->send_name), + "send:%u", pdata->send_gpadl.gpadl_handle); + pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name; + pdata->info.mem[SEND_BUF_MAP].addr = (uintptr_t)pdata->send_buf; + pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE; + pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL; } - ret = vmbus_establish_gpadl(channel, pdata->recv_buf, - RECV_BUFFER_SIZE, &pdata->recv_gpadl); - if (ret) { - if (!pdata->recv_gpadl.decrypted) - vfree(pdata->recv_buf); - goto fail_close; - } - - /* put Global Physical Address Label in name */ - snprintf(pdata->recv_name, sizeof(pdata->recv_name), - "recv:%u", pdata->recv_gpadl.gpadl_handle); - pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name; - pdata->info.mem[RECV_BUF_MAP].addr - = (uintptr_t)pdata->recv_buf; - pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE; - pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL; - - pdata->send_buf = vzalloc(SEND_BUFFER_SIZE); - if (pdata->send_buf == NULL) { - ret = -ENOMEM; - goto fail_close; - } - - ret = vmbus_establish_gpadl(channel, pdata->send_buf, - SEND_BUFFER_SIZE, &pdata->send_gpadl); - if (ret) { - if (!pdata->send_gpadl.decrypted) - vfree(pdata->send_buf); - goto fail_close; - } - - snprintf(pdata->send_name, sizeof(pdata->send_name), - "send:%u", pdata->send_gpadl.gpadl_handle); - pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name; - pdata->info.mem[SEND_BUF_MAP].addr - = (uintptr_t)pdata->send_buf; - pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE; - pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL; - pdata->info.priv = pdata; pdata->device = dev; @@ -342,10 +360,18 @@ hv_uio_probe(struct hv_device *dev, goto fail_close; } - ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr); - if (ret) - dev_notice(&dev->device, - "sysfs create ring bin file failed; %d\n", ret); + /* + * This internally calls sysfs_update_group, which returns a non-zero value if it executes + * before sysfs_create_group. This is expected as the 'ring' will be created later in + * vmbus_device_register() -> vmbus_add_channel_kobj(). Thus, no need to check the return + * value and print warning. + * + * Creating/exposing sysfs in driver probe is not encouraged as it can lead to race + * conditions with userspace. For backward compatibility, "ring" sysfs could not be removed + * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify + * APIs to make sure that ring is created. + */ + hv_create_ring_sysfs(channel, hv_uio_ring_mmap); hv_set_drvdata(dev, pdata); @@ -367,7 +393,7 @@ hv_uio_remove(struct hv_device *dev) if (!pdata) return; - sysfs_remove_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr); + hv_remove_ring_sysfs(dev->channel); uio_unregister_device(&pdata->info); hv_uio_cleanup(dev, pdata); |