summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-10 13:42:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-10 13:42:09 -0700
commit09102704c67457c6cdea6c0394c34843484a852c (patch)
treec4faeb56ff6c94d50b56da17969b88b27c19eae5 /drivers/vhost
parent84fc461db99b2dc19e019c0a97725a3653687981 (diff)
parent044e4b09223039e571e6ec540e25552054208765 (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: - virtio-mem: paravirtualized memory hotplug - support doorbell mapping for vdpa - config interrupt support in ifc - fixes all over the place * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (40 commits) vhost/test: fix up after API change virtio_mem: convert device block size into 64bit virtio-mem: drop unnecessary initialization ifcvf: implement config interrupt in IFCVF vhost: replace -1 with VHOST_FILE_UNBIND in ioctls vhost_vdpa: Support config interrupt in vdpa ifcvf: ignore continuous setting same status value virtio-mem: Don't rely on implicit compiler padding for requests virtio-mem: Try to unplug the complete online memory block first virtio-mem: Use -ETXTBSY as error code if the device is busy virtio-mem: Unplug subblocks right-to-left virtio-mem: Drop manual check for already present memory virtio-mem: Add parent resource for all added "System RAM" virtio-mem: Better retry handling virtio-mem: Offline and remove completely unplugged memory blocks mm/memory_hotplug: Introduce offline_and_remove_memory() virtio-mem: Allow to offline partially unplugged memory blocks mm: Allow to offline unmovable PageOffline() pages via MEM_GOING_OFFLINE virtio-mem: Paravirtualized memory hotunplug part 2 virtio-mem: Paravirtualized memory hotunplug part 1 ...
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/Kconfig17
-rw-r--r--drivers/vhost/net.c2
-rw-r--r--drivers/vhost/scsi.c2
-rw-r--r--drivers/vhost/test.c2
-rw-r--r--drivers/vhost/vdpa.c112
-rw-r--r--drivers/vhost/vhost.c98
-rw-r--r--drivers/vhost/vhost.h8
-rw-r--r--drivers/vhost/vringh.c6
-rw-r--r--drivers/vhost/vsock.c2
9 files changed, 195 insertions, 54 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index c4f273793595..2c75d164b827 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -13,15 +13,6 @@ config VHOST_RING
This option is selected by any driver which needs to access
the host side of a virtio ring.
-config VHOST_DPN
- bool
- depends on !ARM || AEABI
- default y
- help
- Anything selecting VHOST or VHOST_RING must depend on VHOST_DPN.
- This excludes the deprecated ARM ABI since that forces a 4 byte
- alignment on all structs - incompatible with virtio spec requirements.
-
config VHOST
tristate
select VHOST_IOTLB
@@ -37,7 +28,7 @@ if VHOST_MENU
config VHOST_NET
tristate "Host kernel accelerator for virtio net"
- depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) && VHOST_DPN
+ depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP)
select VHOST
---help---
This kernel module can be loaded in host kernel to accelerate
@@ -49,7 +40,7 @@ config VHOST_NET
config VHOST_SCSI
tristate "VHOST_SCSI TCM fabric driver"
- depends on TARGET_CORE && EVENTFD && VHOST_DPN
+ depends on TARGET_CORE && EVENTFD
select VHOST
default n
---help---
@@ -58,7 +49,7 @@ config VHOST_SCSI
config VHOST_VSOCK
tristate "vhost virtio-vsock driver"
- depends on VSOCKETS && EVENTFD && VHOST_DPN
+ depends on VSOCKETS && EVENTFD
select VHOST
select VIRTIO_VSOCKETS_COMMON
default n
@@ -72,7 +63,7 @@ config VHOST_VSOCK
config VHOST_VDPA
tristate "Vhost driver for vDPA-based backend"
- depends on EVENTFD && VHOST_DPN
+ depends on EVENTFD
select VHOST
depends on VDPA
help
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 516519dcc8ff..e992decfec53 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1327,7 +1327,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
}
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
UIO_MAXIOV + VHOST_NET_BATCH,
- VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT,
+ VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
NULL);
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 8b104f76f324..6fb4d7ecfa19 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
}
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
- VHOST_SCSI_WEIGHT, 0, NULL);
+ VHOST_SCSI_WEIGHT, 0, true, NULL);
vhost_scsi_init_inflight(vs, NULL);
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 9a3a09005e03..0466921f4772 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -120,7 +120,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
- VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, NULL);
+ VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL);
f->private_data = n;
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 40e2a5747c98..7580e34f76c1 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -15,12 +15,14 @@
#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/device.h>
+#include <linux/mm.h>
#include <linux/iommu.h>
#include <linux/uuid.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
+#include <linux/kernel.h>
#include "vhost.h"
@@ -70,6 +72,7 @@ struct vhost_vdpa {
int nvqs;
int virtio_id;
int minor;
+ struct eventfd_ctx *config_ctx;
};
static DEFINE_IDA(vhost_vdpa_ida);
@@ -101,6 +104,17 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
return IRQ_HANDLED;
}
+static irqreturn_t vhost_vdpa_config_cb(void *private)
+{
+ struct vhost_vdpa *v = private;
+ struct eventfd_ctx *config_ctx = v->config_ctx;
+
+ if (config_ctx)
+ eventfd_signal(config_ctx, 1);
+
+ return IRQ_HANDLED;
+}
+
static void vhost_vdpa_reset(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
@@ -288,6 +302,36 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
return 0;
}
+static void vhost_vdpa_config_put(struct vhost_vdpa *v)
+{
+ if (v->config_ctx)
+ eventfd_ctx_put(v->config_ctx);
+}
+
+static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
+{
+ struct vdpa_callback cb;
+ int fd;
+ struct eventfd_ctx *ctx;
+
+ cb.callback = vhost_vdpa_config_cb;
+ cb.private = v->vdpa;
+ if (copy_from_user(&fd, argp, sizeof(fd)))
+ return -EFAULT;
+
+ ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
+ swap(ctx, v->config_ctx);
+
+ if (!IS_ERR_OR_NULL(ctx))
+ eventfd_ctx_put(ctx);
+
+ if (IS_ERR(v->config_ctx))
+ return PTR_ERR(v->config_ctx);
+
+ v->vdpa->config->set_config_cb(v->vdpa, &cb);
+
+ return 0;
+}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
@@ -395,6 +439,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_SET_LOG_FD:
r = -ENOIOCTLCMD;
break;
+ case VHOST_VDPA_SET_CONFIG_CALL:
+ r = vhost_vdpa_set_config_call(v, argp);
+ break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
@@ -694,7 +741,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
vqs[i] = &v->vqs[i];
vqs[i]->handle_kick = handle_vq_kick;
}
- vhost_dev_init(dev, vqs, nvqs, 0, 0, 0,
+ vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
vhost_vdpa_process_iotlb_msg);
dev->iotlb = vhost_iotlb_alloc(0, 0);
@@ -729,6 +776,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
vhost_dev_stop(&v->vdev);
vhost_vdpa_iotlb_free(v);
vhost_vdpa_free_domain(v);
+ vhost_vdpa_config_put(v);
vhost_dev_cleanup(&v->vdev);
kfree(v->vdev.vqs);
mutex_unlock(&d->mutex);
@@ -739,12 +787,74 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
return 0;
}
+#ifdef CONFIG_MMU
+static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
+{
+ struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ struct vdpa_notification_area notify;
+ struct vm_area_struct *vma = vmf->vma;
+ u16 index = vma->vm_pgoff;
+
+ notify = ops->get_vq_notification(vdpa, index);
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
+ notify.addr >> PAGE_SHIFT, PAGE_SIZE,
+ vma->vm_page_prot))
+ return VM_FAULT_SIGBUS;
+
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct vhost_vdpa_vm_ops = {
+ .fault = vhost_vdpa_fault,
+};
+
+static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct vhost_vdpa *v = vma->vm_file->private_data;
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ struct vdpa_notification_area notify;
+ int index = vma->vm_pgoff;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+ if (vma->vm_flags & VM_READ)
+ return -EINVAL;
+ if (index > 65535)
+ return -EINVAL;
+ if (!ops->get_vq_notification)
+ return -ENOTSUPP;
+
+ /* To be safe and easily modelled by userspace, We only
+ * support the doorbell which sits on the page boundary and
+ * does not share the page with other registers.
+ */
+ notify = ops->get_vq_notification(vdpa, index);
+ if (notify.addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+ if (vma->vm_end - vma->vm_start != notify.size)
+ return -ENOTSUPP;
+
+ vma->vm_ops = &vhost_vdpa_vm_ops;
+ return 0;
+}
+#endif /* CONFIG_MMU */
+
static const struct file_operations vhost_vdpa_fops = {
.owner = THIS_MODULE,
.open = vhost_vdpa_open,
.release = vhost_vdpa_release,
.write_iter = vhost_vdpa_chr_write_iter,
.unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
+#ifdef CONFIG_MMU
+ .mmap = vhost_vdpa_mmap,
+#endif /* CONFIG_MMU */
.compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 596132a96cd5..062595ee1f83 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -166,11 +166,16 @@ static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
void *key)
{
struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
+ struct vhost_work *work = &poll->work;
if (!(key_to_poll(key) & poll->mask))
return 0;
- vhost_poll_queue(poll);
+ if (!poll->dev->use_worker)
+ work->fn(work);
+ else
+ vhost_poll_queue(poll);
+
return 0;
}
@@ -454,6 +459,7 @@ static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
void vhost_dev_init(struct vhost_dev *dev,
struct vhost_virtqueue **vqs, int nvqs,
int iov_limit, int weight, int byte_weight,
+ bool use_worker,
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg))
{
@@ -471,6 +477,7 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
+ dev->use_worker = use_worker;
dev->msg_handler = msg_handler;
init_llist_head(&dev->work_list);
init_waitqueue_head(&dev->wait);
@@ -534,6 +541,36 @@ bool vhost_dev_has_owner(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
+static void vhost_attach_mm(struct vhost_dev *dev)
+{
+ /* No owner, become one */
+ if (dev->use_worker) {
+ dev->mm = get_task_mm(current);
+ } else {
+ /* vDPA device does not use worker thead, so there's
+ * no need to hold the address space for mm. This help
+ * to avoid deadlock in the case of mmap() which may
+ * held the refcnt of the file and depends on release
+ * method to remove vma.
+ */
+ dev->mm = current->mm;
+ mmgrab(dev->mm);
+ }
+}
+
+static void vhost_detach_mm(struct vhost_dev *dev)
+{
+ if (!dev->mm)
+ return;
+
+ if (dev->use_worker)
+ mmput(dev->mm);
+ else
+ mmdrop(dev->mm);
+
+ dev->mm = NULL;
+}
+
/* Caller should have device mutex */
long vhost_dev_set_owner(struct vhost_dev *dev)
{
@@ -546,21 +583,24 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
goto err_mm;
}
- /* No owner, become one */
- dev->mm = get_task_mm(current);
+ vhost_attach_mm(dev);
+
dev->kcov_handle = kcov_common_handle();
- worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
- if (IS_ERR(worker)) {
- err = PTR_ERR(worker);
- goto err_worker;
- }
+ if (dev->use_worker) {
+ worker = kthread_create(vhost_worker, dev,
+ "vhost-%d", current->pid);
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
+ goto err_worker;
+ }
- dev->worker = worker;
- wake_up_process(worker); /* avoid contributing to loadavg */
+ dev->worker = worker;
+ wake_up_process(worker); /* avoid contributing to loadavg */
- err = vhost_attach_cgroups(dev);
- if (err)
- goto err_cgroup;
+ err = vhost_attach_cgroups(dev);
+ if (err)
+ goto err_cgroup;
+ }
err = vhost_dev_alloc_iovecs(dev);
if (err)
@@ -568,12 +608,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
return 0;
err_cgroup:
- kthread_stop(worker);
- dev->worker = NULL;
+ if (dev->worker) {
+ kthread_stop(dev->worker);
+ dev->worker = NULL;
+ }
err_worker:
- if (dev->mm)
- mmput(dev->mm);
- dev->mm = NULL;
+ vhost_detach_mm(dev);
dev->kcov_handle = 0;
err_mm:
return err;
@@ -670,9 +710,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
dev->worker = NULL;
dev->kcov_handle = 0;
}
- if (dev->mm)
- mmput(dev->mm);
- dev->mm = NULL;
+ vhost_detach_mm(dev);
}
EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -882,7 +920,7 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
#define vhost_put_user(vq, x, ptr) \
({ \
- int ret = -EFAULT; \
+ int ret; \
if (!vq->iotlb) { \
ret = __put_user(x, ptr); \
} else { \
@@ -1244,9 +1282,9 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
}
static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
- struct vring_desc __user *desc,
- struct vring_avail __user *avail,
- struct vring_used __user *used)
+ vring_desc_t __user *desc,
+ vring_avail_t __user *avail,
+ vring_used_t __user *used)
{
return access_ok(desc, vhost_get_desc_size(vq, num)) &&
@@ -1574,7 +1612,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
- eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd);
if (IS_ERR(eventfp)) {
r = PTR_ERR(eventfp);
break;
@@ -1590,7 +1628,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
- ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd);
+ ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
if (IS_ERR(ctx)) {
r = PTR_ERR(ctx);
break;
@@ -1602,7 +1640,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
- ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd);
+ ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
if (IS_ERR(ctx)) {
r = PTR_ERR(ctx);
break;
@@ -1727,7 +1765,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
r = get_user(fd, (int __user *)argp);
if (r < 0)
break;
- ctx = fd == -1 ? NULL : eventfd_ctx_fdget(fd);
+ ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
if (IS_ERR(ctx)) {
r = PTR_ERR(ctx);
break;
@@ -2300,7 +2338,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
unsigned count)
{
- struct vring_used_elem __user *used;
+ vring_used_elem_t __user *used;
u16 old, new;
int start;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index f8403bd46b85..c8e96a095d3b 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -67,9 +67,9 @@ struct vhost_virtqueue {
/* The actual ring of buffers. */
struct mutex mutex;
unsigned int num;
- struct vring_desc __user *desc;
- struct vring_avail __user *avail;
- struct vring_used __user *used;
+ vring_desc_t __user *desc;
+ vring_avail_t __user *avail;
+ vring_used_t __user *used;
const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct eventfd_ctx *call_ctx;
@@ -154,6 +154,7 @@ struct vhost_dev {
int weight;
int byte_weight;
u64 kcov_handle;
+ bool use_worker;
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg);
};
@@ -161,6 +162,7 @@ struct vhost_dev {
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
int nvqs, int iov_limit, int weight, int byte_weight,
+ bool use_worker,
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg));
long vhost_dev_set_owner(struct vhost_dev *dev);
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index ba8e0d6cfd97..e059a9a47cdf 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -620,9 +620,9 @@ static inline int xfer_to_user(const struct vringh *vrh,
*/
int vringh_init_user(struct vringh *vrh, u64 features,
unsigned int num, bool weak_barriers,
- struct vring_desc __user *desc,
- struct vring_avail __user *avail,
- struct vring_used __user *used)
+ vring_desc_t __user *desc,
+ vring_avail_t __user *avail,
+ vring_used_t __user *used)
{
/* Sane power of 2 please! */
if (!num || num > 0xffff || (num & (num - 1))) {
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index fb4e944c4d0d..a483cec31d5c 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -632,7 +632,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
- VHOST_VSOCK_WEIGHT, NULL);
+ VHOST_VSOCK_WEIGHT, true, NULL);
file->private_data = vsock;
spin_lock_init(&vsock->send_pkt_list_lock);