diff options
Diffstat (limited to 'drivers/vhost/vdpa.c')
| -rw-r--r-- | drivers/vhost/vdpa.c | 669 |
1 files changed, 561 insertions, 108 deletions
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 851539807bc9..05a481e4c385 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -28,32 +28,141 @@ enum { VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | - (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | + (1ULL << VHOST_BACKEND_F_IOTLB_ASID), }; #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) +#define VHOST_VDPA_IOTLB_BUCKETS 16 + +struct vhost_vdpa_as { + struct hlist_node hash_link; + struct vhost_iotlb iotlb; + u32 id; +}; + struct vhost_vdpa { struct vhost_dev vdev; struct iommu_domain *domain; struct vhost_virtqueue *vqs; struct completion completion; struct vdpa_device *vdpa; + struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; struct device dev; struct cdev cdev; atomic_t opened; - int nvqs; + u32 nvqs; int virtio_id; int minor; struct eventfd_ctx *config_ctx; int in_batch; struct vdpa_iova_range range; + u32 batch_asid; + bool suspended; }; static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid); + +static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) +{ + struct vhost_vdpa_as *as = container_of(iotlb, struct + vhost_vdpa_as, iotlb); + return as->id; +} + +static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) +{ + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; + struct vhost_vdpa_as *as; + + hlist_for_each_entry(as, head, hash_link) + if (as->id == asid) + return as; + + return NULL; +} + +static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (!as) + return NULL; + + return &as->iotlb; +} + +static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) +{ + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; + struct vhost_vdpa_as *as; + + if (asid_to_as(v, asid)) + return NULL; + + if (asid >= v->vdpa->nas) + return NULL; + + as = kmalloc(sizeof(*as), GFP_KERNEL); + if (!as) + return NULL; + + vhost_iotlb_init(&as->iotlb, 0, 0); + as->id = asid; + hlist_add_head(&as->hash_link, head); + + return as; +} + +static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, + u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (as) + return as; + + return vhost_vdpa_alloc_as(v, asid); +} + +static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->reset_map) + ops->reset_map(vdpa, asid); +} + +static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (!as) + return -EINVAL; + + hlist_del(&as->hash_link); + vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); + /* + * Devices with vendor specific IOMMU may need to restore + * iotlb to the initial or default state, which cannot be + * cleaned up in the all range unmap call above. Give them + * a chance to clean up or reset the map to the desired + * state. + */ + vhost_vdpa_reset_map(v, asid); + kfree(as); + + return 0; +} + static void handle_vq_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, @@ -70,7 +179,7 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; if (call_ctx) - eventfd_signal(call_ctx, 1); + eventfd_signal(call_ctx); return IRQ_HANDLED; } @@ -81,7 +190,7 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) struct eventfd_ctx *config_ctx = v->config_ctx; if (config_ctx) - eventfd_signal(config_ctx, 1); + eventfd_signal(config_ctx); return IRQ_HANDLED; } @@ -97,16 +206,17 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) return; irq = ops->get_vq_irq(vdpa, qid); - irq_bypass_unregister_producer(&vq->call_ctx.producer); - if (!vq->call_ctx.ctx || irq < 0) + if (irq < 0) return; - vq->call_ctx.producer.token = vq->call_ctx.ctx; - vq->call_ctx.producer.irq = irq; - ret = irq_bypass_register_producer(&vq->call_ctx.producer); + if (!vq->call_ctx.ctx) + return; + + ret = irq_bypass_register_producer(&vq->call_ctx.producer, + vq->call_ctx.ctx, irq); if (unlikely(ret)) - dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", - qid, vq->call_ctx.producer.token, ret); + dev_info(&v->dev, "vq %u, irq bypass producer (eventfd %p) registration fails, ret = %d\n", + qid, vq->call_ctx.ctx, ret); } static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) @@ -116,13 +226,48 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) irq_bypass_unregister_producer(&vq->call_ctx.producer); } -static int vhost_vdpa_reset(struct vhost_vdpa *v) +static int _compat_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; + u32 flags = 0; + + v->suspended = false; + + if (v->vdev.vqs) { + flags |= !vhost_backend_has_feature(v->vdev.vqs[0], + VHOST_BACKEND_F_IOTLB_PERSIST) ? + VDPA_RESET_F_CLEAN_MAP : 0; + } + + return vdpa_reset(vdpa, flags); +} +static int vhost_vdpa_reset(struct vhost_vdpa *v) +{ v->in_batch = 0; + return _compat_vdpa_reset(v); +} - return vdpa_reset(vdpa); +static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!vdpa->use_va || !ops->bind_mm) + return 0; + + return ops->bind_mm(vdpa, v->vdev.mm); +} + +static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!vdpa->use_va || !ops->unbind_mm) + return; + + ops->unbind_mm(vdpa); } static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) @@ -158,7 +303,8 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; u8 status, status_old; - int ret, nvqs = v->nvqs; + u32 nvqs = v->nvqs; + int ret; u16 i; if (copy_from_user(&status, statusp, sizeof(status))) @@ -178,7 +324,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) vhost_vdpa_unsetup_vq_irq(v, i); if (status == 0) { - ret = vdpa_reset(vdpa); + ret = _compat_vdpa_reset(v); if (ret) return ret; } else @@ -256,6 +402,30 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return 0; } +static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->suspend; +} + +static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->resume; +} + +static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->get_vq_desc_group; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -270,11 +440,34 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) return 0; } +static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->get_backend_features) + return 0; + else + return ops->get_backend_features(vdpa); +} + +static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return (!ops->set_map && !ops->dma_map) || ops->reset_map || + vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); +} + static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_dev *d = &v->vdev; + u64 actual_features; u64 features; + int i; /* * It's not allowed to change the features after they have @@ -286,9 +479,19 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (vdpa_set_features(vdpa, features, false)) + if (vdpa_set_features(vdpa, features)) return -EINVAL; + /* let the vqs know what has been configured */ + actual_features = ops->get_driver_features(vdpa); + for (i = 0; i < d->nvqs; ++i) { + struct vhost_virtqueue *vq = d->vqs[i]; + + mutex_lock(&vq->mutex); + vq->acked_features = actual_features; + mutex_unlock(&vq->mutex); + } + return 0; } @@ -355,6 +558,77 @@ static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) return 0; } +static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + u32 size; + + size = ops->get_config_size(vdpa); + + if (copy_to_user(argp, &size, sizeof(size))) + return -EFAULT; + + return 0; +} + +static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + + if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) + return -EFAULT; + + return 0; +} + +/* After a successful return of ioctl the device must not process more + * virtqueue descriptors. The device can answer to read or writes of config + * fields as if it were not suspended. In particular, writing to "queue_enable" + * with a value of 1 will not make the device start processing buffers. + */ +static long vhost_vdpa_suspend(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + int ret; + + if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) + return 0; + + if (!ops->suspend) + return -EOPNOTSUPP; + + ret = ops->suspend(vdpa); + if (!ret) + v->suspended = true; + + return ret; +} + +/* After a successful return of this ioctl the device resumes processing + * virtqueue descriptors. The device becomes fully operational the same way it + * was before it was suspended. + */ +static long vhost_vdpa_resume(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + int ret; + + if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) + return 0; + + if (!ops->resume) + return -EOPNOTSUPP; + + ret = ops->resume(vdpa); + if (!ret) + v->suspended = false; + + return ret; +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -383,12 +657,62 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, return -EFAULT; ops->set_vq_ready(vdpa, idx, s.num); return 0; + case VHOST_VDPA_GET_VRING_GROUP: + if (!ops->get_vq_group) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_group(vdpa, idx); + if (s.num >= vdpa->ngroups) + return -EIO; + else if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; + case VHOST_VDPA_GET_VRING_DESC_GROUP: + if (!vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_desc_group(vdpa, idx); + if (s.num >= vdpa->ngroups) + return -EIO; + else if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; + case VHOST_VDPA_SET_GROUP_ASID: + if (copy_from_user(&s, argp, sizeof(s))) + return -EFAULT; + if (s.num >= vdpa->nas) + return -EINVAL; + if (!ops->set_group_asid) + return -EOPNOTSUPP; + return ops->set_group_asid(vdpa, idx, s.num); + case VHOST_VDPA_GET_VRING_SIZE: + if (!ops->get_vq_size) + return -EOPNOTSUPP; + s.index = idx; + s.num = ops->get_vq_size(vdpa, idx); + if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; case VHOST_GET_VRING_BASE: r = ops->get_vq_state(v->vdpa, idx, &vq_state); if (r) return r; - vq->last_avail_idx = vq_state.split.avail_index; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->last_avail_idx = vq_state.packed.last_avail_idx | + (vq_state.packed.last_avail_counter << 15); + vq->last_used_idx = vq_state.packed.last_used_idx | + (vq_state.packed.last_used_counter << 15); + } else { + vq->last_avail_idx = vq_state.split.avail_index; + } + break; + case VHOST_SET_VRING_CALL: + if (vq->call_ctx.ctx) { + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_unsetup_vq_irq(v, idx); + } break; } @@ -398,6 +722,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, switch (cmd) { case VHOST_SET_VRING_ADDR: + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + if (ops->set_vq_address(vdpa, idx, (u64)(uintptr_t)vq->desc, (u64)(uintptr_t)vq->avail, @@ -406,21 +733,34 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: - vq_state.split.avail_index = vq->last_avail_idx; - if (ops->set_vq_state(vdpa, idx, &vq_state)) - r = -EINVAL; + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) + return -EINVAL; + + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; + vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); + vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; + vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); + } else { + vq_state.split.avail_index = vq->last_avail_idx; + } + r = ops->set_vq_state(vdpa, idx, &vq_state); break; case VHOST_SET_VRING_CALL: if (vq->call_ctx.ctx) { cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; + cb.trigger = vq->call_ctx.ctx; + if (ops->get_status(vdpa) & + VIRTIO_CONFIG_S_DRIVER_OK) + vhost_vdpa_setup_vq_irq(v, idx); } else { cb.callback = NULL; cb.private = NULL; + cb.trigger = NULL; } ops->set_vq_cb(vdpa, idx, &cb); - vhost_vdpa_setup_vq_irq(v, idx); break; case VHOST_SET_VRING_NUM: @@ -444,7 +784,27 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (cmd == VHOST_SET_BACKEND_FEATURES) { if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~VHOST_VDPA_BACKEND_FEATURES) + if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | + BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | + BIT_ULL(VHOST_BACKEND_F_SUSPEND) | + BIT_ULL(VHOST_BACKEND_F_RESUME) | + BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && + !vhost_vdpa_can_suspend(v)) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && + !vhost_vdpa_can_resume(v)) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) + return -EINVAL; + if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && + !vhost_vdpa_has_desc_group(v)) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && + !vhost_vdpa_has_persistent_map(v)) return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; @@ -477,6 +837,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_VRING_NUM: r = vhost_vdpa_get_vring_num(v, argp); break; + case VHOST_VDPA_GET_GROUP_NUM: + if (copy_to_user(argp, &v->vdpa->ngroups, + sizeof(v->vdpa->ngroups))) + r = -EFAULT; + break; + case VHOST_VDPA_GET_AS_NUM: + if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) + r = -EFAULT; + break; case VHOST_SET_LOG_BASE: case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; @@ -486,12 +855,33 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; case VHOST_GET_BACKEND_FEATURES: features = VHOST_VDPA_BACKEND_FEATURES; + if (vhost_vdpa_can_suspend(v)) + features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); + if (vhost_vdpa_can_resume(v)) + features |= BIT_ULL(VHOST_BACKEND_F_RESUME); + if (vhost_vdpa_has_desc_group(v)) + features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); + if (vhost_vdpa_has_persistent_map(v)) + features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); + features |= vhost_vdpa_get_backend_features(v); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; case VHOST_VDPA_GET_IOVA_RANGE: r = vhost_vdpa_get_iova_range(v, argp); break; + case VHOST_VDPA_GET_CONFIG_SIZE: + r = vhost_vdpa_get_config_size(v, argp); + break; + case VHOST_VDPA_GET_VQS_COUNT: + r = vhost_vdpa_get_vqs_count(v, argp); + break; + case VHOST_VDPA_SUSPEND: + r = vhost_vdpa_suspend(v); + break; + case VHOST_VDPA_RESUME: + r = vhost_vdpa_resume(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) @@ -499,14 +889,36 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; } + if (r) + goto out; + + switch (cmd) { + case VHOST_SET_OWNER: + r = vhost_vdpa_bind_mm(v); + if (r) + vhost_dev_reset_owner(d, NULL); + break; + } +out: mutex_unlock(&d->mutex); return r; } +static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, + struct vhost_iotlb_map *map, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + if (ops->dma_map) { + ops->dma_unmap(vdpa, asid, map->start, map->size); + } else if (ops->set_map == NULL) { + iommu_unmap(v->domain, map->start, map->size); + } +} -static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb_map *map; struct page *page; unsigned long pfn, pinned; @@ -521,14 +933,14 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) unpin_user_page(page); } atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); + vhost_vdpa_general_unmap(v, map, asid); vhost_iotlb_map_free(iotlb, map); } } -static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { - struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb_map *map; struct vdpa_map_file *map_file; @@ -536,27 +948,21 @@ static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) map_file = (struct vdpa_map_file *)map->opaque; fput(map_file->file); kfree(map_file); + vhost_vdpa_general_unmap(v, map, asid); vhost_iotlb_map_free(iotlb, map); } } -static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid) { struct vdpa_device *vdpa = v->vdpa; if (vdpa->use_va) - return vhost_vdpa_va_unmap(v, start, last); + return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); - return vhost_vdpa_pa_unmap(v, start, last); -} - -static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) -{ - struct vhost_dev *dev = &v->vdev; - - vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); - kfree(dev->iotlb); - dev->iotlb = NULL; + return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); } static int perm_to_iommu_flags(u32 perm) @@ -581,30 +987,32 @@ static int perm_to_iommu_flags(u32 perm) return flags | IOMMU_CACHE; } -static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, - u64 size, u64 pa, u32 perm, void *opaque) +static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 iova, u64 size, u64 pa, u32 perm, void *opaque) { struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + u32 asid = iotlb_to_asid(iotlb); int r = 0; - r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, + r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, pa, perm, opaque); if (r) return r; if (ops->dma_map) { - r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); + r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); } else if (ops->set_map) { if (!v->in_batch) - r = ops->set_map(vdpa, dev->iotlb); + r = ops->set_map(vdpa, asid, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, - perm_to_iommu_flags(perm)); + perm_to_iommu_flags(perm), + GFP_KERNEL_ACCOUNT); } if (r) { - vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + vhost_iotlb_del_range(iotlb, iova, iova + size - 1); return r; } @@ -614,25 +1022,25 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, return 0; } -static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) +static void vhost_vdpa_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 iova, u64 size) { - struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + u32 asid = iotlb_to_asid(iotlb); - vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); + vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); - if (ops->dma_map) { - ops->dma_unmap(vdpa, iova, size); - } else if (ops->set_map) { + if (ops->set_map) { if (!v->in_batch) - ops->set_map(vdpa, dev->iotlb); - } else { - iommu_unmap(v->domain, iova, size); + ops->set_map(vdpa, asid, iotlb); } + } static int vhost_vdpa_va_map(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 iova, u64 size, u64 uaddr, u32 perm) { struct vhost_dev *dev = &v->vdev; @@ -662,7 +1070,7 @@ static int vhost_vdpa_va_map(struct vhost_vdpa *v, offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; map_file->offset = offset; map_file->file = get_file(vma->vm_file); - ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, + ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, perm, map_file); if (ret) { fput(map_file->file); @@ -675,7 +1083,7 @@ next: map_iova += map_size; } if (ret) - vhost_vdpa_unmap(v, iova, map_iova - iova); + vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); mmap_read_unlock(dev->mm); @@ -683,6 +1091,7 @@ next: } static int vhost_vdpa_pa_map(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 iova, u64 size, u64 uaddr, u32 perm) { struct vhost_dev *dev = &v->vdev; @@ -724,7 +1133,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, while (npages) { sz2pin = min_t(unsigned long, npages, list_size); pinned = pin_user_pages(cur_base, sz2pin, - gup_flags, page_list, NULL); + gup_flags, page_list); if (sz2pin != pinned) { if (pinned < 0) { ret = pinned; @@ -746,7 +1155,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = PFN_PHYS(last_pfn - map_pfn + 1); - ret = vhost_vdpa_map(v, iova, csize, + ret = vhost_vdpa_map(v, iotlb, iova, csize, PFN_PHYS(map_pfn), perm, NULL); if (ret) { @@ -776,7 +1185,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, } /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1), + ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), PFN_PHYS(map_pfn), perm, NULL); out: if (ret) { @@ -796,7 +1205,7 @@ out: for (pfn = map_pfn; pfn <= last_pfn; pfn++) unpin_user_page(pfn_to_page(pfn)); } - vhost_vdpa_unmap(v, start, size); + vhost_vdpa_unmap(v, iotlb, start, size); } unlock: mmap_read_unlock(dev->mm); @@ -807,11 +1216,10 @@ free: } static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, struct vhost_iotlb_msg *msg) { - struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; - struct vhost_iotlb *iotlb = dev->iotlb; if (msg->iova < v->range.first || !msg->size || msg->iova > U64_MAX - msg->size + 1 || @@ -823,19 +1231,21 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, return -EEXIST; if (vdpa->use_va) - return vhost_vdpa_va_map(v, msg->iova, msg->size, + return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, msg->uaddr, msg->perm); - return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, + return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, msg->perm); } -static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, +static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg) { struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_iotlb *iotlb = NULL; + struct vhost_vdpa_as *as = NULL; int r = 0; mutex_lock(&dev->mutex); @@ -844,19 +1254,44 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, if (r) goto unlock; + if (msg->type == VHOST_IOTLB_UPDATE || + msg->type == VHOST_IOTLB_BATCH_BEGIN) { + as = vhost_vdpa_find_alloc_as(v, asid); + if (!as) { + dev_err(&v->dev, "can't find and alloc asid %d\n", + asid); + r = -EINVAL; + goto unlock; + } + iotlb = &as->iotlb; + } else + iotlb = asid_to_iotlb(v, asid); + + if ((v->in_batch && v->batch_asid != asid) || !iotlb) { + if (v->in_batch && v->batch_asid != asid) { + dev_info(&v->dev, "batch id %d asid %d\n", + v->batch_asid, asid); + } + if (!iotlb) + dev_err(&v->dev, "no iotlb for asid %d\n", asid); + r = -EINVAL; + goto unlock; + } + switch (msg->type) { case VHOST_IOTLB_UPDATE: - r = vhost_vdpa_process_iotlb_update(v, msg); + r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); break; case VHOST_IOTLB_INVALIDATE: - vhost_vdpa_unmap(v, msg->iova, msg->size); + vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); break; case VHOST_IOTLB_BATCH_BEGIN: + v->batch_asid = asid; v->in_batch = true; break; case VHOST_IOTLB_BATCH_END: if (v->in_batch && ops->set_map) - ops->set_map(vdpa, dev->iotlb); + ops->set_map(vdpa, asid, iotlb); v->in_batch = false; break; default: @@ -883,24 +1318,26 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); - struct bus_type *bus; + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; int ret; /* Device want to do DMA by itself */ if (ops->set_map || ops->dma_map) return 0; - bus = dma_dev->bus; - if (!bus) - return -EFAULT; - - if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) + if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { + dev_warn_once(&v->dev, + "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); return -ENOTSUPP; + } - v->domain = iommu_domain_alloc(bus); - if (!v->domain) - return -EIO; + v->domain = iommu_paging_domain_alloc(dma_dev); + if (IS_ERR(v->domain)) { + ret = PTR_ERR(v->domain); + v->domain = NULL; + return ret; + } ret = iommu_attach_device(v->domain, dma_dev); if (ret) @@ -910,13 +1347,15 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) err_attach: iommu_domain_free(v->domain); + v->domain = NULL; return ret; } static void vhost_vdpa_free_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; if (v->domain) { iommu_detach_device(v->domain, dma_dev); @@ -943,12 +1382,30 @@ static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) } } +static void vhost_vdpa_cleanup(struct vhost_vdpa *v) +{ + struct vhost_vdpa_as *as; + u32 asid; + + for (asid = 0; asid < v->vdpa->nas; asid++) { + as = asid_to_as(v, asid); + if (as) + vhost_vdpa_remove_as(v, asid); + } + + vhost_vdpa_free_domain(v); + vhost_dev_cleanup(&v->vdev); + kfree(v->vdev.vqs); + v->vdev.vqs = NULL; +} + static int vhost_vdpa_open(struct inode *inode, struct file *filep) { struct vhost_vdpa *v; struct vhost_dev *dev; struct vhost_virtqueue **vqs; - int nvqs, i, r, opened; + int r, opened; + u32 i, nvqs; v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); @@ -971,19 +1428,14 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) for (i = 0; i < nvqs; i++) { vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; + vqs[i]->call_ctx.ctx = NULL; } vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); - dev->iotlb = vhost_iotlb_alloc(0, 0); - if (!dev->iotlb) { - r = -ENOMEM; - goto err_init_iotlb; - } - r = vhost_vdpa_alloc_domain(v); if (r) - goto err_init_iotlb; + goto err_alloc_domain; vhost_vdpa_set_iova_range(v); @@ -991,9 +1443,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return 0; -err_init_iotlb: - vhost_dev_cleanup(&v->vdev); - kfree(vqs); +err_alloc_domain: + vhost_vdpa_cleanup(v); err: atomic_dec(&v->opened); return r; @@ -1001,7 +1452,7 @@ err: static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) { - int i; + u32 i; for (i = 0; i < v->nvqs; i++) vhost_vdpa_unsetup_vq_irq(v, i); @@ -1017,11 +1468,9 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_clean_irq(v); vhost_vdpa_reset(v); vhost_dev_stop(&v->vdev); - vhost_vdpa_iotlb_free(v); - vhost_vdpa_free_domain(v); + vhost_vdpa_unbind_mm(v); vhost_vdpa_config_put(v); - vhost_dev_cleanup(&v->vdev); - kfree(v->vdev.vqs); + vhost_vdpa_cleanup(v); mutex_unlock(&d->mutex); atomic_dec(&v->opened); @@ -1042,13 +1491,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) notify = ops->get_vq_notification(vdpa, index); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - PFN_DOWN(notify.addr), PAGE_SIZE, - vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); } static const struct vm_operations_struct vhost_vdpa_vm_ops = { @@ -1084,7 +1527,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_end - vma->vm_start != notify.size) return -ENOTSUPP; - vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_ops = &vhost_vdpa_vm_ops; return 0; } @@ -1107,7 +1550,7 @@ static void vhost_vdpa_release_dev(struct device *device) struct vhost_vdpa *v = container_of(device, struct vhost_vdpa, dev); - ida_simple_remove(&vhost_vdpa_ida, v->minor); + ida_free(&vhost_vdpa_ida, v->minor); kfree(v->vqs); kfree(v); } @@ -1117,14 +1560,21 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa *v; int minor; - int r; + int i, r; + + /* We can't support platform IOMMU device with more than 1 + * group or as + */ + if (!ops->set_map && !ops->dma_map && + (vdpa->ngroups > 1 || vdpa->nas > 1)) + return -EOPNOTSUPP; v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!v) return -ENOMEM; - minor = ida_simple_get(&vhost_vdpa_ida, 0, - VHOST_VDPA_DEV_MAX, GFP_KERNEL); + minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1, + GFP_KERNEL); if (minor < 0) { kfree(v); return minor; @@ -1161,6 +1611,9 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) init_completion(&v->completion); vdpa_set_drvdata(vdpa, v); + for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) + INIT_HLIST_HEAD(&v->as[i]); + return 0; err: |
