diff options
Diffstat (limited to 'drivers/vhost/vdpa.c')
| -rw-r--r-- | drivers/vhost/vdpa.c | 1075 |
1 files changed, 884 insertions, 191 deletions
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 7580e34f76c1..05a481e4c385 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -16,72 +16,152 @@ #include <linux/cdev.h> #include <linux/device.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/iommu.h> #include <linux/uuid.h> #include <linux/vdpa.h> #include <linux/nospec.h> #include <linux/vhost.h> -#include <linux/virtio_net.h> -#include <linux/kernel.h> #include "vhost.h" enum { - VHOST_VDPA_FEATURES = - (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | - (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM) | - (1ULL << VIRTIO_F_RING_PACKED) | - (1ULL << VIRTIO_F_ORDER_PLATFORM) | - (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | - (1ULL << VIRTIO_RING_F_EVENT_IDX), - - VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES | - (1ULL << VIRTIO_NET_F_CSUM) | - (1ULL << VIRTIO_NET_F_GUEST_CSUM) | - (1ULL << VIRTIO_NET_F_MTU) | - (1ULL << VIRTIO_NET_F_MAC) | - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | - (1ULL << VIRTIO_NET_F_GUEST_ECN) | - (1ULL << VIRTIO_NET_F_GUEST_UFO) | - (1ULL << VIRTIO_NET_F_HOST_TSO4) | - (1ULL << VIRTIO_NET_F_HOST_TSO6) | - (1ULL << VIRTIO_NET_F_HOST_ECN) | - (1ULL << VIRTIO_NET_F_HOST_UFO) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_NET_F_STATUS) | - (1ULL << VIRTIO_NET_F_SPEED_DUPLEX), + VHOST_VDPA_BACKEND_FEATURES = + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | + (1ULL << VHOST_BACKEND_F_IOTLB_ASID), }; -/* Currently, only network backend w/o multiqueue is supported. */ -#define VHOST_VDPA_VQ_MAX 2 - #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) +#define VHOST_VDPA_IOTLB_BUCKETS 16 + +struct vhost_vdpa_as { + struct hlist_node hash_link; + struct vhost_iotlb iotlb; + u32 id; +}; + struct vhost_vdpa { struct vhost_dev vdev; struct iommu_domain *domain; struct vhost_virtqueue *vqs; struct completion completion; struct vdpa_device *vdpa; + struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; struct device dev; struct cdev cdev; atomic_t opened; - int nvqs; + u32 nvqs; int virtio_id; int minor; struct eventfd_ctx *config_ctx; + int in_batch; + struct vdpa_iova_range range; + u32 batch_asid; + bool suspended; }; static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; -static const u64 vhost_vdpa_features[] = { - [VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES, -}; +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid); + +static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) +{ + struct vhost_vdpa_as *as = container_of(iotlb, struct + vhost_vdpa_as, iotlb); + return as->id; +} + +static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) +{ + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; + struct vhost_vdpa_as *as; + + hlist_for_each_entry(as, head, hash_link) + if (as->id == asid) + return as; + + return NULL; +} + +static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (!as) + return NULL; + + return &as->iotlb; +} + +static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) +{ + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; + struct vhost_vdpa_as *as; + + if (asid_to_as(v, asid)) + return NULL; + + if (asid >= v->vdpa->nas) + return NULL; + + as = kmalloc(sizeof(*as), GFP_KERNEL); + if (!as) + return NULL; + + vhost_iotlb_init(&as->iotlb, 0, 0); + as->id = asid; + hlist_add_head(&as->hash_link, head); + + return as; +} + +static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, + u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (as) + return as; + + return vhost_vdpa_alloc_as(v, asid); +} + +static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->reset_map) + ops->reset_map(vdpa, asid); +} + +static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (!as) + return -EINVAL; + + hlist_del(&as->hash_link); + vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); + /* + * Devices with vendor specific IOMMU may need to restore + * iotlb to the initial or default state, which cannot be + * cleaned up in the all range unmap call above. Give them + * a chance to clean up or reset the map to the desired + * state. + */ + vhost_vdpa_reset_map(v, asid); + kfree(as); + + return 0; +} static void handle_vq_kick(struct vhost_work *work) { @@ -96,10 +176,10 @@ static void handle_vq_kick(struct vhost_work *work) static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) { struct vhost_virtqueue *vq = private; - struct eventfd_ctx *call_ctx = vq->call_ctx; + struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; if (call_ctx) - eventfd_signal(call_ctx, 1); + eventfd_signal(call_ctx); return IRQ_HANDLED; } @@ -110,17 +190,84 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) struct eventfd_ctx *config_ctx = v->config_ctx; if (config_ctx) - eventfd_signal(config_ctx, 1); + eventfd_signal(config_ctx); return IRQ_HANDLED; } -static void vhost_vdpa_reset(struct vhost_vdpa *v) +static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) +{ + struct vhost_virtqueue *vq = &v->vqs[qid]; + const struct vdpa_config_ops *ops = v->vdpa->config; + struct vdpa_device *vdpa = v->vdpa; + int ret, irq; + + if (!ops->get_vq_irq) + return; + + irq = ops->get_vq_irq(vdpa, qid); + if (irq < 0) + return; + + if (!vq->call_ctx.ctx) + return; + + ret = irq_bypass_register_producer(&vq->call_ctx.producer, + vq->call_ctx.ctx, irq); + if (unlikely(ret)) + dev_info(&v->dev, "vq %u, irq bypass producer (eventfd %p) registration fails, ret = %d\n", + qid, vq->call_ctx.ctx, ret); +} + +static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) +{ + struct vhost_virtqueue *vq = &v->vqs[qid]; + + irq_bypass_unregister_producer(&vq->call_ctx.producer); +} + +static int _compat_vdpa_reset(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + u32 flags = 0; + + v->suspended = false; + + if (v->vdev.vqs) { + flags |= !vhost_backend_has_feature(v->vdev.vqs[0], + VHOST_BACKEND_F_IOTLB_PERSIST) ? + VDPA_RESET_F_CLEAN_MAP : 0; + } + + return vdpa_reset(vdpa, flags); +} + +static int vhost_vdpa_reset(struct vhost_vdpa *v) +{ + v->in_batch = 0; + return _compat_vdpa_reset(v); +} + +static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!vdpa->use_va || !ops->bind_mm) + return 0; + + return ops->bind_mm(vdpa, v->vdev.mm); +} + +static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - ops->set_status(vdpa, 0); + if (!vdpa->use_va || !ops->unbind_mm) + return; + + ops->unbind_mm(vdpa); } static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) @@ -155,19 +302,37 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - u8 status; + u8 status, status_old; + u32 nvqs = v->nvqs; + int ret; + u16 i; if (copy_from_user(&status, statusp, sizeof(status))) return -EFAULT; + status_old = ops->get_status(vdpa); + /* * Userspace shouldn't remove status bits unless reset the * status to 0. */ - if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) + if (status != 0 && (status_old & ~status) != 0) return -EINVAL; - ops->set_status(vdpa, status); + if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) + for (i = 0; i < nvqs; i++) + vhost_vdpa_unsetup_vq_irq(v, i); + + if (status == 0) { + ret = _compat_vdpa_reset(v); + if (ret) + return ret; + } else + vdpa_set_status(vdpa, status); + + if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) + for (i = 0; i < nvqs; i++) + vhost_vdpa_setup_vq_irq(v, i); return 0; } @@ -175,15 +340,10 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) static int vhost_vdpa_config_validate(struct vhost_vdpa *v, struct vhost_vdpa_config *c) { - long size = 0; - - switch (v->virtio_id) { - case VIRTIO_ID_NET: - size = sizeof(struct virtio_net_config); - break; - } + struct vdpa_device *vdpa = v->vdpa; + size_t size = vdpa->config->get_config_size(vdpa); - if (c->len == 0) + if (c->len == 0 || c->off > size) return -EINVAL; if (c->len > size - c->off) @@ -196,7 +356,6 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, struct vhost_vdpa_config __user *c) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa_config config; unsigned long size = offsetof(struct vhost_vdpa_config, buf); u8 *buf; @@ -209,7 +368,7 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, if (!buf) return -ENOMEM; - ops->get_config(vdpa, config.off, buf, config.len); + vdpa_get_config(vdpa, config.off, buf, config.len); if (copy_to_user(c->buf, buf, config.len)) { kvfree(buf); @@ -224,7 +383,6 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, struct vhost_vdpa_config __user *c) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa_config config; unsigned long size = offsetof(struct vhost_vdpa_config, buf); u8 *buf; @@ -233,29 +391,48 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return -EFAULT; if (vhost_vdpa_config_validate(v, &config)) return -EINVAL; - buf = kvzalloc(config.len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - if (copy_from_user(buf, c->buf, config.len)) { - kvfree(buf); - return -EFAULT; - } + buf = vmemdup_user(c->buf, config.len); + if (IS_ERR(buf)) + return PTR_ERR(buf); - ops->set_config(vdpa, config.off, buf, config.len); + vdpa_set_config(vdpa, config.off, buf, config.len); kvfree(buf); return 0; } +static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->suspend; +} + +static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->resume; +} + +static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->get_vq_desc_group; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; u64 features; - features = ops->get_features(vdpa); - features &= vhost_vdpa_features[v->virtio_id]; + features = ops->get_device_features(vdpa); if (copy_to_user(featurep, &features, sizeof(features))) return -EFAULT; @@ -263,11 +440,34 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) return 0; } +static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->get_backend_features) + return 0; + else + return ops->get_backend_features(vdpa); +} + +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return (!ops->set_map && !ops->dma_map) || ops->reset_map || + vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); +} + static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_dev *d = &v->vdev; + u64 actual_features; u64 features; + int i; /* * It's not allowed to change the features after they have @@ -279,11 +479,18 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~vhost_vdpa_features[v->virtio_id]) + if (vdpa_set_features(vdpa, features)) return -EINVAL; - if (ops->set_features(vdpa, features)) - return -EINVAL; + /* let the vqs know what has been configured */ + actual_features = ops->get_driver_features(vdpa); + for (i = 0; i < d->nvqs; ++i) { + struct vhost_virtqueue *vq = d->vqs[i]; + + mutex_lock(&vq->mutex); + vq->acked_features = actual_features; + mutex_unlock(&vq->mutex); + } return 0; } @@ -304,8 +511,10 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) static void vhost_vdpa_config_put(struct vhost_vdpa *v) { - if (v->config_ctx) + if (v->config_ctx) { eventfd_ctx_put(v->config_ctx); + v->config_ctx = NULL; + } } static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) @@ -315,7 +524,7 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) struct eventfd_ctx *ctx; cb.callback = vhost_vdpa_config_cb; - cb.private = v->vdpa; + cb.private = v; if (copy_from_user(&fd, argp, sizeof(fd))) return -EFAULT; @@ -325,18 +534,107 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) if (!IS_ERR_OR_NULL(ctx)) eventfd_ctx_put(ctx); - if (IS_ERR(v->config_ctx)) - return PTR_ERR(v->config_ctx); + if (IS_ERR(v->config_ctx)) { + long ret = PTR_ERR(v->config_ctx); + + v->config_ctx = NULL; + return ret; + } v->vdpa->config->set_config_cb(v->vdpa, &cb); return 0; } + +static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vhost_vdpa_iova_range range = { + .first = v->range.first, + .last = v->range.last, + }; + + if (copy_to_user(argp, &range, sizeof(range))) + return -EFAULT; + return 0; +} + +static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + u32 size; + + size = ops->get_config_size(vdpa); + + if (copy_to_user(argp, &size, sizeof(size))) + return -EFAULT; + + return 0; +} + +static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + + if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) + return -EFAULT; + + return 0; +} + +/* After a successful return of ioctl the device must not process more + * virtqueue descriptors. The device can answer to read or writes of config + * fields as if it were not suspended. In particular, writing to "queue_enable" + * with a value of 1 will not make the device start processing buffers. + */ +static long vhost_vdpa_suspend(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + int ret; + + if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) + return 0; + + if (!ops->suspend) + return -EOPNOTSUPP; + + ret = ops->suspend(vdpa); + if (!ret) + v->suspended = true; + + return ret; +} + +/* After a successful return of this ioctl the device resumes processing + * virtqueue descriptors. The device becomes fully operational the same way it + * was before it was suspended. + */ +static long vhost_vdpa_resume(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + int ret; + + if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) + return 0; + + if (!ops->resume) + return -EOPNOTSUPP; + + ret = ops->resume(vdpa); + if (!ret) + v->suspended = false; + + return ret; +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_vq_state vq_state; struct vdpa_callback cb; struct vhost_virtqueue *vq; struct vhost_vring_state s; @@ -353,22 +651,80 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, idx = array_index_nospec(idx, v->nvqs); vq = &v->vqs[idx]; - if (cmd == VHOST_VDPA_SET_VRING_ENABLE) { + switch (cmd) { + case VHOST_VDPA_SET_VRING_ENABLE: if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; ops->set_vq_ready(vdpa, idx, s.num); return 0; + case VHOST_VDPA_GET_VRING_GROUP: + if (!ops->get_vq_group) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_group(vdpa, idx); + if (s.num >= vdpa->ngroups) + return -EIO; + else if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; + case VHOST_VDPA_GET_VRING_DESC_GROUP: + if (!vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_desc_group(vdpa, idx); + if (s.num >= vdpa->ngroups) + return -EIO; + else if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; + case VHOST_VDPA_SET_GROUP_ASID: + if (copy_from_user(&s, argp, sizeof(s))) + return -EFAULT; + if (s.num >= vdpa->nas) + return -EINVAL; + if (!ops->set_group_asid) + return -EOPNOTSUPP; + return ops->set_group_asid(vdpa, idx, s.num); + case VHOST_VDPA_GET_VRING_SIZE: + if (!ops->get_vq_size) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_size(vdpa, idx); + if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; + case VHOST_GET_VRING_BASE: + r = ops->get_vq_state(v->vdpa, idx, &vq_state); + if (r) + return r; + + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->last_avail_idx = vq_state.packed.last_avail_idx | + (vq_state.packed.last_avail_counter << 15); + vq->last_used_idx = vq_state.packed.last_used_idx | + (vq_state.packed.last_used_counter << 15); + } else { + vq->last_avail_idx = vq_state.split.avail_index; + } + break; + case VHOST_SET_VRING_CALL: + if (vq->call_ctx.ctx) { + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_unsetup_vq_irq(v, idx); + } + break; } - if (cmd == VHOST_GET_VRING_BASE) - vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx); - r = vhost_vring_ioctl(&v->vdev, cmd, argp); if (r) return r; switch (cmd) { case VHOST_SET_VRING_ADDR: + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + if (ops->set_vq_address(vdpa, idx, (u64)(uintptr_t)vq->desc, (u64)(uintptr_t)vq->avail, @@ -377,17 +733,32 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: - if (ops->set_vq_state(vdpa, idx, vq->last_avail_idx)) - r = -EINVAL; + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; + vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); + vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; + vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); + } else { + vq_state.split.avail_index = vq->last_avail_idx; + } + r = ops->set_vq_state(vdpa, idx, &vq_state); break; case VHOST_SET_VRING_CALL: - if (vq->call_ctx) { + if (vq->call_ctx.ctx) { cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; + cb.trigger = vq->call_ctx.ctx; + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_setup_vq_irq(v, idx); } else { cb.callback = NULL; cb.private = NULL; + cb.trigger = NULL; } ops->set_vq_cb(vdpa, idx, &cb); break; @@ -406,7 +777,38 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, struct vhost_vdpa *v = filep->private_data; struct vhost_dev *d = &v->vdev; void __user *argp = (void __user *)arg; - long r; + u64 __user *featurep = argp; + u64 features; + long r = 0; + + if (cmd == VHOST_SET_BACKEND_FEATURES) { + if (copy_from_user(&features, featurep, sizeof(features))) + return -EFAULT; + if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | + BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | + BIT_ULL(VHOST_BACKEND_F_SUSPEND) | + BIT_ULL(VHOST_BACKEND_F_RESUME) | + BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && + !vhost_vdpa_can_suspend(v)) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && + !vhost_vdpa_can_resume(v)) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) + return -EINVAL; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && + !vhost_vdpa_has_persistent_map(v)) + return -EOPNOTSUPP; + vhost_set_backend_features(&v->vdev, features); + return 0; + } mutex_lock(&d->mutex); @@ -435,6 +837,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_VRING_NUM: r = vhost_vdpa_get_vring_num(v, argp); break; + case VHOST_VDPA_GET_GROUP_NUM: + if (copy_to_user(argp, &v->vdpa->ngroups, + sizeof(v->vdpa->ngroups))) + r = -EFAULT; + break; + case VHOST_VDPA_GET_AS_NUM: + if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) + r = -EFAULT; + break; case VHOST_SET_LOG_BASE: case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; @@ -442,6 +853,35 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_SET_CONFIG_CALL: r = vhost_vdpa_set_config_call(v, argp); break; + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VDPA_BACKEND_FEATURES; + if (vhost_vdpa_can_suspend(v)) + features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); + if (vhost_vdpa_can_resume(v)) + features |= BIT_ULL(VHOST_BACKEND_F_RESUME); + if (vhost_vdpa_has_desc_group(v)) + features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); + if (vhost_vdpa_has_persistent_map(v)) + features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); + features |= vhost_vdpa_get_backend_features(v); + if (copy_to_user(featurep, &features, sizeof(features))) + r = -EFAULT; + break; + case VHOST_VDPA_GET_IOVA_RANGE: + r = vhost_vdpa_get_iova_range(v, argp); + break; + case VHOST_VDPA_GET_CONFIG_SIZE: + r = vhost_vdpa_get_config_size(v, argp); + break; + case VHOST_VDPA_GET_VQS_COUNT: + r = vhost_vdpa_get_vqs_count(v, argp); + break; + case VHOST_VDPA_SUSPEND: + r = vhost_vdpa_suspend(v); + break; + case VHOST_VDPA_RESUME: + r = vhost_vdpa_resume(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) @@ -449,39 +889,80 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; } + if (r) + goto out; + + switch (cmd) { + case VHOST_SET_OWNER: + r = vhost_vdpa_bind_mm(v); + if (r) + vhost_dev_reset_owner(d, NULL); + break; + } +out: mutex_unlock(&d->mutex); return r; } +static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, + struct vhost_iotlb_map *map, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + if (ops->dma_map) { + ops->dma_unmap(vdpa, asid, map->start, map->size); + } else if (ops->set_map == NULL) { + iommu_unmap(v->domain, map->start, map->size); + } +} -static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb_map *map; struct page *page; unsigned long pfn, pinned; while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { - pinned = map->size >> PAGE_SHIFT; - for (pfn = map->addr >> PAGE_SHIFT; + pinned = PFN_DOWN(map->size); + for (pfn = PFN_DOWN(map->addr); pinned > 0; pfn++, pinned--) { page = pfn_to_page(pfn); if (map->perm & VHOST_ACCESS_WO) set_page_dirty_lock(page); unpin_user_page(page); } - atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); + atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); + vhost_vdpa_general_unmap(v, map, asid); vhost_iotlb_map_free(iotlb, map); } } -static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { - struct vhost_dev *dev = &v->vdev; + struct vhost_iotlb_map *map; + struct vdpa_map_file *map_file; - vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); - kfree(dev->iotlb); - dev->iotlb = NULL; + while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { + map_file = (struct vdpa_map_file *)map->opaque; + fput(map_file->file); + kfree(map_file); + vhost_vdpa_general_unmap(v, map, asid); + vhost_iotlb_map_free(iotlb, map); + } +} + +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + + if (vdpa->use_va) + return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); + + return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); } static int perm_to_iommu_flags(u32 perm) @@ -506,153 +987,319 @@ static int perm_to_iommu_flags(u32 perm) return flags | IOMMU_CACHE; } -static int vhost_vdpa_map(struct vhost_vdpa *v, - u64 iova, u64 size, u64 pa, u32 perm) +static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 iova, u64 size, u64 pa, u32 perm, void *opaque) { struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + u32 asid = iotlb_to_asid(iotlb); int r = 0; - r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, - pa, perm); + r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, + pa, perm, opaque); if (r) return r; - if (ops->dma_map) - r = ops->dma_map(vdpa, iova, size, pa, perm); - else if (ops->set_map) - r = ops->set_map(vdpa, dev->iotlb); - else + if (ops->dma_map) { + r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); + } else if (ops->set_map) { + if (!v->in_batch) + r = ops->set_map(vdpa, asid, iotlb); + } else { r = iommu_map(v->domain, iova, pa, size, - perm_to_iommu_flags(perm)); + perm_to_iommu_flags(perm), + GFP_KERNEL_ACCOUNT); + } + if (r) { + vhost_iotlb_del_range(iotlb, iova, iova + size - 1); + return r; + } - return r; + if (!vdpa->use_va) + atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); + + return 0; } -static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) +static void vhost_vdpa_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 iova, u64 size) { - struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + u32 asid = iotlb_to_asid(iotlb); - vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); + vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); + + if (ops->set_map) { + if (!v->in_batch) + ops->set_map(vdpa, asid, iotlb); + } - if (ops->dma_map) - ops->dma_unmap(vdpa, iova, size); - else if (ops->set_map) - ops->set_map(vdpa, dev->iotlb); - else - iommu_unmap(v->domain, iova, size); } -static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, - struct vhost_iotlb_msg *msg) +static int vhost_vdpa_va_map(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 iova, u64 size, u64 uaddr, u32 perm) +{ + struct vhost_dev *dev = &v->vdev; + u64 offset, map_size, map_iova = iova; + struct vdpa_map_file *map_file; + struct vm_area_struct *vma; + int ret = 0; + + mmap_read_lock(dev->mm); + + while (size) { + vma = find_vma(dev->mm, uaddr); + if (!vma) { + ret = -EINVAL; + break; + } + map_size = min(size, vma->vm_end - uaddr); + if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && + !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) + goto next; + + map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); + if (!map_file) { + ret = -ENOMEM; + break; + } + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; + map_file->offset = offset; + map_file->file = get_file(vma->vm_file); + ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, + perm, map_file); + if (ret) { + fput(map_file->file); + kfree(map_file); + break; + } +next: + size -= map_size; + uaddr += map_size; + map_iova += map_size; + } + if (ret) + vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); + + mmap_read_unlock(dev->mm); + + return ret; +} + +static int vhost_vdpa_pa_map(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 iova, u64 size, u64 uaddr, u32 perm) { struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; - u64 iova = msg->iova; + unsigned long lock_limit, sz2pin, nchunks, i; + u64 start = iova; + long pinned; int ret = 0; - if (vhost_iotlb_itree_first(iotlb, msg->iova, - msg->iova + msg->size - 1)) - return -EEXIST; - + /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; - if (msg->perm & VHOST_ACCESS_WO) + if (perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; - npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; - if (!npages) - return -EINVAL; + npages = PFN_UP(size + (iova & ~PAGE_MASK)); + if (!npages) { + ret = -EINVAL; + goto free; + } mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } - cur_base = msg->uaddr & PAGE_MASK; + cur_base = uaddr & PAGE_MASK; iova &= PAGE_MASK; + nchunks = 0; while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); - if (ret != pinned) + sz2pin = min_t(unsigned long, npages, list_size); + pinned = pin_user_pages(cur_base, sz2pin, + gup_flags, page_list); + if (sz2pin != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } goto out; + } + nchunks++; if (!last_pfn) map_pfn = page_to_pfn(page_list[0]); - for (i = 0; i < ret; i++) { + for (i = 0; i < pinned; i++) { unsigned long this_pfn = page_to_pfn(page_list[i]); u64 csize; if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) + csize = PFN_PHYS(last_pfn - map_pfn + 1); + ret = vhost_vdpa_map(v, iotlb, iova, csize, + PFN_PHYS(map_pfn), + perm, NULL); + if (ret) { + /* + * Unpin the pages that are left unmapped + * from this point on in the current + * page_list. The remaining outstanding + * ones which may stride across several + * chunks will be covered in the common + * error path subsequently. + */ + unpin_user_pages(&page_list[i], + pinned - i); goto out; + } + map_pfn = this_pfn; iova += csize; + nchunks = 0; } last_pfn = this_pfn; } - cur_base += ret << PAGE_SHIFT; - npages -= ret; + cur_base += PFN_PHYS(pinned); + npages -= pinned; } /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, msg->perm); + ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), + PFN_PHYS(map_pfn), perm, NULL); out: if (ret) { - vhost_vdpa_unmap(v, msg->iova, msg->size); - atomic64_sub(npages, &dev->mm->pinned_vm); + if (nchunks) { + unsigned long pfn; + + /* + * Unpin the outstanding pages which are yet to be + * mapped but haven't due to vdpa_map() or + * pin_user_pages() failure. + * + * Mapped pages are accounted in vdpa_map(), hence + * the corresponding unpinning will be handled by + * vdpa_unmap(). + */ + WARN_ON(!last_pfn); + for (pfn = map_pfn; pfn <= last_pfn; pfn++) + unpin_user_page(pfn_to_page(pfn)); + } + vhost_vdpa_unmap(v, iotlb, start, size); } +unlock: mmap_read_unlock(dev->mm); +free: free_page((unsigned long)page_list); return ret; + } -static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, +static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + struct vhost_iotlb_msg *msg) +{ + struct vdpa_device *vdpa = v->vdpa; + + if (msg->iova < v->range.first || !msg->size || + msg->iova > U64_MAX - msg->size + 1 || + msg->iova + msg->size - 1 > v->range.last) + return -EINVAL; + + if (vhost_iotlb_itree_first(iotlb, msg->iova, + msg->iova + msg->size - 1)) + return -EEXIST; + + if (vdpa->use_va) + return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, + msg->uaddr, msg->perm); + + return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, + msg->perm); +} + +static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg) { struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_iotlb *iotlb = NULL; + struct vhost_vdpa_as *as = NULL; int r = 0; + mutex_lock(&dev->mutex); + r = vhost_dev_check_owner(dev); if (r) - return r; + goto unlock; + + if (msg->type == VHOST_IOTLB_UPDATE || + msg->type == VHOST_IOTLB_BATCH_BEGIN) { + as = vhost_vdpa_find_alloc_as(v, asid); + if (!as) { + dev_err(&v->dev, "can't find and alloc asid %d\n", + asid); + r = -EINVAL; + goto unlock; + } + iotlb = &as->iotlb; + } else + iotlb = asid_to_iotlb(v, asid); + + if ((v->in_batch && v->batch_asid != asid) || !iotlb) { + if (v->in_batch && v->batch_asid != asid) { + dev_info(&v->dev, "batch id %d asid %d\n", + v->batch_asid, asid); + } + if (!iotlb) + dev_err(&v->dev, "no iotlb for asid %d\n", asid); + r = -EINVAL; + goto unlock; + } switch (msg->type) { case VHOST_IOTLB_UPDATE: - r = vhost_vdpa_process_iotlb_update(v, msg); + r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); break; case VHOST_IOTLB_INVALIDATE: - vhost_vdpa_unmap(v, msg->iova, msg->size); + vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); + break; + case VHOST_IOTLB_BATCH_BEGIN: + v->batch_asid = asid; + v->in_batch = true; + break; + case VHOST_IOTLB_BATCH_END: + if (v->in_batch && ops->set_map) + ops->set_map(vdpa, asid, iotlb); + v->in_batch = false; break; default: r = -EINVAL; break; } +unlock: + mutex_unlock(&dev->mutex); return r; } @@ -671,24 +1318,26 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); - struct bus_type *bus; + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; int ret; /* Device want to do DMA by itself */ if (ops->set_map || ops->dma_map) return 0; - bus = dma_dev->bus; - if (!bus) - return -EFAULT; - - if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) + if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { + dev_warn_once(&v->dev, + "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); return -ENOTSUPP; + } - v->domain = iommu_domain_alloc(bus); - if (!v->domain) - return -EIO; + v->domain = iommu_paging_domain_alloc(dma_dev); + if (IS_ERR(v->domain)) { + ret = PTR_ERR(v->domain); + v->domain = NULL; + return ret; + } ret = iommu_attach_device(v->domain, dma_dev); if (ret) @@ -698,13 +1347,15 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) err_attach: iommu_domain_free(v->domain); + v->domain = NULL; return ret; } static void vhost_vdpa_free_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; if (v->domain) { iommu_detach_device(v->domain, dma_dev); @@ -714,12 +1365,47 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v) v->domain = NULL; } +static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) +{ + struct vdpa_iova_range *range = &v->range; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->get_iova_range) { + *range = ops->get_iova_range(vdpa); + } else if (v->domain && v->domain->geometry.force_aperture) { + range->first = v->domain->geometry.aperture_start; + range->last = v->domain->geometry.aperture_end; + } else { + range->first = 0; + range->last = ULLONG_MAX; + } +} + +static void vhost_vdpa_cleanup(struct vhost_vdpa *v) +{ + struct vhost_vdpa_as *as; + u32 asid; + + for (asid = 0; asid < v->vdpa->nas; asid++) { + as = asid_to_as(v, asid); + if (as) + vhost_vdpa_remove_as(v, asid); + } + + vhost_vdpa_free_domain(v); + vhost_dev_cleanup(&v->vdev); + kfree(v->vdev.vqs); + v->vdev.vqs = NULL; +} + static int vhost_vdpa_open(struct inode *inode, struct file *filep) { struct vhost_vdpa *v; struct vhost_dev *dev; struct vhost_virtqueue **vqs; - int nvqs, i, r, opened; + int r, opened; + u32 i, nvqs; v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); @@ -728,7 +1414,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return -EBUSY; nvqs = v->nvqs; - vhost_vdpa_reset(v); + r = vhost_vdpa_reset(v); + if (r) + goto err; vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); if (!vqs) { @@ -740,31 +1428,36 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) for (i = 0; i < nvqs; i++) { vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; + vqs[i]->call_ctx.ctx = NULL; } vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); - dev->iotlb = vhost_iotlb_alloc(0, 0); - if (!dev->iotlb) { - r = -ENOMEM; - goto err_init_iotlb; - } - r = vhost_vdpa_alloc_domain(v); if (r) - goto err_init_iotlb; + goto err_alloc_domain; + + vhost_vdpa_set_iova_range(v); filep->private_data = v; return 0; -err_init_iotlb: - vhost_dev_cleanup(&v->vdev); +err_alloc_domain: + vhost_vdpa_cleanup(v); err: atomic_dec(&v->opened); return r; } +static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) +{ + u32 i; + + for (i = 0; i < v->nvqs; i++) + vhost_vdpa_unsetup_vq_irq(v, i); +} + static int vhost_vdpa_release(struct inode *inode, struct file *filep) { struct vhost_vdpa *v = filep->private_data; @@ -772,13 +1465,12 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) mutex_lock(&d->mutex); filep->private_data = NULL; + vhost_vdpa_clean_irq(v); vhost_vdpa_reset(v); vhost_dev_stop(&v->vdev); - vhost_vdpa_iotlb_free(v); - vhost_vdpa_free_domain(v); + vhost_vdpa_unbind_mm(v); vhost_vdpa_config_put(v); - vhost_dev_cleanup(&v->vdev); - kfree(v->vdev.vqs); + vhost_vdpa_cleanup(v); mutex_unlock(&d->mutex); atomic_dec(&v->opened); @@ -799,13 +1491,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) notify = ops->get_vq_notification(vdpa, index); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - notify.addr >> PAGE_SHIFT, PAGE_SIZE, - vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); } static const struct vm_operations_struct vhost_vdpa_vm_ops = { @@ -818,7 +1504,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; struct vdpa_notification_area notify; - int index = vma->vm_pgoff; + unsigned long index = vma->vm_pgoff; if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; @@ -841,6 +1527,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_end - vma->vm_start != notify.size) return -ENOTSUPP; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &vhost_vdpa_vm_ops; return 0; } @@ -863,7 +1550,7 @@ static void vhost_vdpa_release_dev(struct device *device) struct vhost_vdpa *v = container_of(device, struct vhost_vdpa, dev); - ida_simple_remove(&vhost_vdpa_ida, v->minor); + ida_free(&vhost_vdpa_ida, v->minor); kfree(v->vqs); kfree(v); } @@ -872,19 +1559,22 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) { const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa *v; - int minor, nvqs = VHOST_VDPA_VQ_MAX; - int r; + int minor; + int i, r; - /* Currently, we only accept the network devices. */ - if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) - return -ENOTSUPP; + /* We can't support platform IOMMU device with more than 1 + * group or as + */ + if (!ops->set_map && !ops->dma_map && + (vdpa->ngroups > 1 || vdpa->nas > 1)) + return -EOPNOTSUPP; v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!v) return -ENOMEM; - minor = ida_simple_get(&vhost_vdpa_ida, 0, - VHOST_VDPA_DEV_MAX, GFP_KERNEL); + minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1, + GFP_KERNEL); if (minor < 0) { kfree(v); return minor; @@ -893,14 +1583,14 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) atomic_set(&v->opened, 0); v->minor = minor; v->vdpa = vdpa; - v->nvqs = nvqs; + v->nvqs = vdpa->nvqs; v->virtio_id = ops->get_device_id(vdpa); device_initialize(&v->dev); v->dev.release = vhost_vdpa_release_dev; v->dev.parent = &vdpa->dev; v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); - v->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue), + v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), GFP_KERNEL); if (!v->vqs) { r = -ENOMEM; @@ -921,6 +1611,9 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) init_completion(&v->completion); vdpa_set_drvdata(vdpa, v); + for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) + INIT_HLIST_HEAD(&v->as[i]); + return 0; err: |
