summaryrefslogtreecommitdiff
path: root/drivers/vhost/vdpa.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost/vdpa.c')
-rw-r--r--drivers/vhost/vdpa.c669
1 files changed, 561 insertions, 108 deletions
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 851539807bc9..05a481e4c385 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -28,32 +28,141 @@
enum {
VHOST_VDPA_BACKEND_FEATURES =
(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
- (1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
+ (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
+ (1ULL << VHOST_BACKEND_F_IOTLB_ASID),
};
#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
+#define VHOST_VDPA_IOTLB_BUCKETS 16
+
+struct vhost_vdpa_as {
+ struct hlist_node hash_link;
+ struct vhost_iotlb iotlb;
+ u32 id;
+};
+
struct vhost_vdpa {
struct vhost_dev vdev;
struct iommu_domain *domain;
struct vhost_virtqueue *vqs;
struct completion completion;
struct vdpa_device *vdpa;
+ struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
struct device dev;
struct cdev cdev;
atomic_t opened;
- int nvqs;
+ u32 nvqs;
int virtio_id;
int minor;
struct eventfd_ctx *config_ctx;
int in_batch;
struct vdpa_iova_range range;
+ u32 batch_asid;
+ bool suspended;
};
static DEFINE_IDA(vhost_vdpa_ida);
static dev_t vhost_vdpa_major;
+static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
+ struct vhost_iotlb *iotlb, u64 start,
+ u64 last, u32 asid);
+
+static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
+{
+ struct vhost_vdpa_as *as = container_of(iotlb, struct
+ vhost_vdpa_as, iotlb);
+ return as->id;
+}
+
+static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
+{
+ struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
+ struct vhost_vdpa_as *as;
+
+ hlist_for_each_entry(as, head, hash_link)
+ if (as->id == asid)
+ return as;
+
+ return NULL;
+}
+
+static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
+{
+ struct vhost_vdpa_as *as = asid_to_as(v, asid);
+
+ if (!as)
+ return NULL;
+
+ return &as->iotlb;
+}
+
+static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
+{
+ struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
+ struct vhost_vdpa_as *as;
+
+ if (asid_to_as(v, asid))
+ return NULL;
+
+ if (asid >= v->vdpa->nas)
+ return NULL;
+
+ as = kmalloc(sizeof(*as), GFP_KERNEL);
+ if (!as)
+ return NULL;
+
+ vhost_iotlb_init(&as->iotlb, 0, 0);
+ as->id = asid;
+ hlist_add_head(&as->hash_link, head);
+
+ return as;
+}
+
+static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
+ u32 asid)
+{
+ struct vhost_vdpa_as *as = asid_to_as(v, asid);
+
+ if (as)
+ return as;
+
+ return vhost_vdpa_alloc_as(v, asid);
+}
+
+static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (ops->reset_map)
+ ops->reset_map(vdpa, asid);
+}
+
+static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
+{
+ struct vhost_vdpa_as *as = asid_to_as(v, asid);
+
+ if (!as)
+ return -EINVAL;
+
+ hlist_del(&as->hash_link);
+ vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
+ /*
+ * Devices with vendor specific IOMMU may need to restore
+ * iotlb to the initial or default state, which cannot be
+ * cleaned up in the all range unmap call above. Give them
+ * a chance to clean up or reset the map to the desired
+ * state.
+ */
+ vhost_vdpa_reset_map(v, asid);
+ kfree(as);
+
+ return 0;
+}
+
static void handle_vq_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -70,7 +179,7 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
if (call_ctx)
- eventfd_signal(call_ctx, 1);
+ eventfd_signal(call_ctx);
return IRQ_HANDLED;
}
@@ -81,7 +190,7 @@ static irqreturn_t vhost_vdpa_config_cb(void *private)
struct eventfd_ctx *config_ctx = v->config_ctx;
if (config_ctx)
- eventfd_signal(config_ctx, 1);
+ eventfd_signal(config_ctx);
return IRQ_HANDLED;
}
@@ -97,16 +206,17 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
return;
irq = ops->get_vq_irq(vdpa, qid);
- irq_bypass_unregister_producer(&vq->call_ctx.producer);
- if (!vq->call_ctx.ctx || irq < 0)
+ if (irq < 0)
return;
- vq->call_ctx.producer.token = vq->call_ctx.ctx;
- vq->call_ctx.producer.irq = irq;
- ret = irq_bypass_register_producer(&vq->call_ctx.producer);
+ if (!vq->call_ctx.ctx)
+ return;
+
+ ret = irq_bypass_register_producer(&vq->call_ctx.producer,
+ vq->call_ctx.ctx, irq);
if (unlikely(ret))
- dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
- qid, vq->call_ctx.producer.token, ret);
+ dev_info(&v->dev, "vq %u, irq bypass producer (eventfd %p) registration fails, ret = %d\n",
+ qid, vq->call_ctx.ctx, ret);
}
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
@@ -116,13 +226,48 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
irq_bypass_unregister_producer(&vq->call_ctx.producer);
}
-static int vhost_vdpa_reset(struct vhost_vdpa *v)
+static int _compat_vdpa_reset(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
+ u32 flags = 0;
+
+ v->suspended = false;
+
+ if (v->vdev.vqs) {
+ flags |= !vhost_backend_has_feature(v->vdev.vqs[0],
+ VHOST_BACKEND_F_IOTLB_PERSIST) ?
+ VDPA_RESET_F_CLEAN_MAP : 0;
+ }
+
+ return vdpa_reset(vdpa, flags);
+}
+static int vhost_vdpa_reset(struct vhost_vdpa *v)
+{
v->in_batch = 0;
+ return _compat_vdpa_reset(v);
+}
- return vdpa_reset(vdpa);
+static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (!vdpa->use_va || !ops->bind_mm)
+ return 0;
+
+ return ops->bind_mm(vdpa, v->vdev.mm);
+}
+
+static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (!vdpa->use_va || !ops->unbind_mm)
+ return;
+
+ ops->unbind_mm(vdpa);
}
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
@@ -158,7 +303,8 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u8 status, status_old;
- int ret, nvqs = v->nvqs;
+ u32 nvqs = v->nvqs;
+ int ret;
u16 i;
if (copy_from_user(&status, statusp, sizeof(status)))
@@ -178,7 +324,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
vhost_vdpa_unsetup_vq_irq(v, i);
if (status == 0) {
- ret = vdpa_reset(vdpa);
+ ret = _compat_vdpa_reset(v);
if (ret)
return ret;
} else
@@ -256,6 +402,30 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
return 0;
}
+static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ return ops->suspend;
+}
+
+static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ return ops->resume;
+}
+
+static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ return ops->get_vq_desc_group;
+}
+
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
@@ -270,11 +440,34 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
return 0;
}
+static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (!ops->get_backend_features)
+ return 0;
+ else
+ return ops->get_backend_features(vdpa);
+}
+
+static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ return (!ops->set_map && !ops->dma_map) || ops->reset_map ||
+ vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
+}
+
static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
+ struct vhost_dev *d = &v->vdev;
+ u64 actual_features;
u64 features;
+ int i;
/*
* It's not allowed to change the features after they have
@@ -286,9 +479,19 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
- if (vdpa_set_features(vdpa, features, false))
+ if (vdpa_set_features(vdpa, features))
return -EINVAL;
+ /* let the vqs know what has been configured */
+ actual_features = ops->get_driver_features(vdpa);
+ for (i = 0; i < d->nvqs; ++i) {
+ struct vhost_virtqueue *vq = d->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->acked_features = actual_features;
+ mutex_unlock(&vq->mutex);
+ }
+
return 0;
}
@@ -355,6 +558,77 @@ static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
return 0;
}
+static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ u32 size;
+
+ size = ops->get_config_size(vdpa);
+
+ if (copy_to_user(argp, &size, sizeof(size)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+
+ if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/* After a successful return of ioctl the device must not process more
+ * virtqueue descriptors. The device can answer to read or writes of config
+ * fields as if it were not suspended. In particular, writing to "queue_enable"
+ * with a value of 1 will not make the device start processing buffers.
+ */
+static long vhost_vdpa_suspend(struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ int ret;
+
+ if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
+ return 0;
+
+ if (!ops->suspend)
+ return -EOPNOTSUPP;
+
+ ret = ops->suspend(vdpa);
+ if (!ret)
+ v->suspended = true;
+
+ return ret;
+}
+
+/* After a successful return of this ioctl the device resumes processing
+ * virtqueue descriptors. The device becomes fully operational the same way it
+ * was before it was suspended.
+ */
+static long vhost_vdpa_resume(struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ int ret;
+
+ if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
+ return 0;
+
+ if (!ops->resume)
+ return -EOPNOTSUPP;
+
+ ret = ops->resume(vdpa);
+ if (!ret)
+ v->suspended = false;
+
+ return ret;
+}
+
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
@@ -383,12 +657,62 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
return -EFAULT;
ops->set_vq_ready(vdpa, idx, s.num);
return 0;
+ case VHOST_VDPA_GET_VRING_GROUP:
+ if (!ops->get_vq_group)
+ return -EOPNOTSUPP;
+ s.index = idx;
+ s.num = ops->get_vq_group(vdpa, idx);
+ if (s.num >= vdpa->ngroups)
+ return -EIO;
+ else if (copy_to_user(argp, &s, sizeof(s)))
+ return -EFAULT;
+ return 0;
+ case VHOST_VDPA_GET_VRING_DESC_GROUP:
+ if (!vhost_vdpa_has_desc_group(v))
+ return -EOPNOTSUPP;
+ s.index = idx;
+ s.num = ops->get_vq_desc_group(vdpa, idx);
+ if (s.num >= vdpa->ngroups)
+ return -EIO;
+ else if (copy_to_user(argp, &s, sizeof(s)))
+ return -EFAULT;
+ return 0;
+ case VHOST_VDPA_SET_GROUP_ASID:
+ if (copy_from_user(&s, argp, sizeof(s)))
+ return -EFAULT;
+ if (s.num >= vdpa->nas)
+ return -EINVAL;
+ if (!ops->set_group_asid)
+ return -EOPNOTSUPP;
+ return ops->set_group_asid(vdpa, idx, s.num);
+ case VHOST_VDPA_GET_VRING_SIZE:
+ if (!ops->get_vq_size)
+ return -EOPNOTSUPP;
+ s.index = idx;
+ s.num = ops->get_vq_size(vdpa, idx);
+ if (copy_to_user(argp, &s, sizeof(s)))
+ return -EFAULT;
+ return 0;
case VHOST_GET_VRING_BASE:
r = ops->get_vq_state(v->vdpa, idx, &vq_state);
if (r)
return r;
- vq->last_avail_idx = vq_state.split.avail_index;
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+ vq->last_avail_idx = vq_state.packed.last_avail_idx |
+ (vq_state.packed.last_avail_counter << 15);
+ vq->last_used_idx = vq_state.packed.last_used_idx |
+ (vq_state.packed.last_used_counter << 15);
+ } else {
+ vq->last_avail_idx = vq_state.split.avail_index;
+ }
+ break;
+ case VHOST_SET_VRING_CALL:
+ if (vq->call_ctx.ctx) {
+ if (ops->get_status(vdpa) &
+ VIRTIO_CONFIG_S_DRIVER_OK)
+ vhost_vdpa_unsetup_vq_irq(v, idx);
+ }
break;
}
@@ -398,6 +722,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
switch (cmd) {
case VHOST_SET_VRING_ADDR:
+ if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
+ return -EINVAL;
+
if (ops->set_vq_address(vdpa, idx,
(u64)(uintptr_t)vq->desc,
(u64)(uintptr_t)vq->avail,
@@ -406,21 +733,34 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break;
case VHOST_SET_VRING_BASE:
- vq_state.split.avail_index = vq->last_avail_idx;
- if (ops->set_vq_state(vdpa, idx, &vq_state))
- r = -EINVAL;
+ if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
+ return -EINVAL;
+
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+ vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
+ vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
+ vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
+ vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
+ } else {
+ vq_state.split.avail_index = vq->last_avail_idx;
+ }
+ r = ops->set_vq_state(vdpa, idx, &vq_state);
break;
case VHOST_SET_VRING_CALL:
if (vq->call_ctx.ctx) {
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
+ cb.trigger = vq->call_ctx.ctx;
+ if (ops->get_status(vdpa) &
+ VIRTIO_CONFIG_S_DRIVER_OK)
+ vhost_vdpa_setup_vq_irq(v, idx);
} else {
cb.callback = NULL;
cb.private = NULL;
+ cb.trigger = NULL;
}
ops->set_vq_cb(vdpa, idx, &cb);
- vhost_vdpa_setup_vq_irq(v, idx);
break;
case VHOST_SET_VRING_NUM:
@@ -444,7 +784,27 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
if (cmd == VHOST_SET_BACKEND_FEATURES) {
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
- if (features & ~VHOST_VDPA_BACKEND_FEATURES)
+ if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
+ BIT_ULL(VHOST_BACKEND_F_DESC_ASID) |
+ BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) |
+ BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
+ BIT_ULL(VHOST_BACKEND_F_RESUME) |
+ BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK)))
+ return -EOPNOTSUPP;
+ if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
+ !vhost_vdpa_can_suspend(v))
+ return -EOPNOTSUPP;
+ if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
+ !vhost_vdpa_can_resume(v))
+ return -EOPNOTSUPP;
+ if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
+ !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)))
+ return -EINVAL;
+ if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
+ !vhost_vdpa_has_desc_group(v))
+ return -EOPNOTSUPP;
+ if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) &&
+ !vhost_vdpa_has_persistent_map(v))
return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features);
return 0;
@@ -477,6 +837,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_GET_VRING_NUM:
r = vhost_vdpa_get_vring_num(v, argp);
break;
+ case VHOST_VDPA_GET_GROUP_NUM:
+ if (copy_to_user(argp, &v->vdpa->ngroups,
+ sizeof(v->vdpa->ngroups)))
+ r = -EFAULT;
+ break;
+ case VHOST_VDPA_GET_AS_NUM:
+ if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
+ r = -EFAULT;
+ break;
case VHOST_SET_LOG_BASE:
case VHOST_SET_LOG_FD:
r = -ENOIOCTLCMD;
@@ -486,12 +855,33 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break;
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES;
+ if (vhost_vdpa_can_suspend(v))
+ features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
+ if (vhost_vdpa_can_resume(v))
+ features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
+ if (vhost_vdpa_has_desc_group(v))
+ features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID);
+ if (vhost_vdpa_has_persistent_map(v))
+ features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
+ features |= vhost_vdpa_get_backend_features(v);
if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT;
break;
case VHOST_VDPA_GET_IOVA_RANGE:
r = vhost_vdpa_get_iova_range(v, argp);
break;
+ case VHOST_VDPA_GET_CONFIG_SIZE:
+ r = vhost_vdpa_get_config_size(v, argp);
+ break;
+ case VHOST_VDPA_GET_VQS_COUNT:
+ r = vhost_vdpa_get_vqs_count(v, argp);
+ break;
+ case VHOST_VDPA_SUSPEND:
+ r = vhost_vdpa_suspend(v);
+ break;
+ case VHOST_VDPA_RESUME:
+ r = vhost_vdpa_resume(v);
+ break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
@@ -499,14 +889,36 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break;
}
+ if (r)
+ goto out;
+
+ switch (cmd) {
+ case VHOST_SET_OWNER:
+ r = vhost_vdpa_bind_mm(v);
+ if (r)
+ vhost_dev_reset_owner(d, NULL);
+ break;
+ }
+out:
mutex_unlock(&d->mutex);
return r;
}
+static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
+ struct vhost_iotlb_map *map, u32 asid)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ if (ops->dma_map) {
+ ops->dma_unmap(vdpa, asid, map->start, map->size);
+ } else if (ops->set_map == NULL) {
+ iommu_unmap(v->domain, map->start, map->size);
+ }
+}
-static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
+ u64 start, u64 last, u32 asid)
{
struct vhost_dev *dev = &v->vdev;
- struct vhost_iotlb *iotlb = dev->iotlb;
struct vhost_iotlb_map *map;
struct page *page;
unsigned long pfn, pinned;
@@ -521,14 +933,14 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
unpin_user_page(page);
}
atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
+ vhost_vdpa_general_unmap(v, map, asid);
vhost_iotlb_map_free(iotlb, map);
}
}
-static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
+ u64 start, u64 last, u32 asid)
{
- struct vhost_dev *dev = &v->vdev;
- struct vhost_iotlb *iotlb = dev->iotlb;
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
@@ -536,27 +948,21 @@ static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
map_file = (struct vdpa_map_file *)map->opaque;
fput(map_file->file);
kfree(map_file);
+ vhost_vdpa_general_unmap(v, map, asid);
vhost_iotlb_map_free(iotlb, map);
}
}
-static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
+ struct vhost_iotlb *iotlb, u64 start,
+ u64 last, u32 asid)
{
struct vdpa_device *vdpa = v->vdpa;
if (vdpa->use_va)
- return vhost_vdpa_va_unmap(v, start, last);
+ return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
- return vhost_vdpa_pa_unmap(v, start, last);
-}
-
-static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
-{
- struct vhost_dev *dev = &v->vdev;
-
- vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1);
- kfree(dev->iotlb);
- dev->iotlb = NULL;
+ return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
}
static int perm_to_iommu_flags(u32 perm)
@@ -581,30 +987,32 @@ static int perm_to_iommu_flags(u32 perm)
return flags | IOMMU_CACHE;
}
-static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
- u64 size, u64 pa, u32 perm, void *opaque)
+static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
+ u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
{
struct vhost_dev *dev = &v->vdev;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
+ u32 asid = iotlb_to_asid(iotlb);
int r = 0;
- r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
+ r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
pa, perm, opaque);
if (r)
return r;
if (ops->dma_map) {
- r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
+ r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
} else if (ops->set_map) {
if (!v->in_batch)
- r = ops->set_map(vdpa, dev->iotlb);
+ r = ops->set_map(vdpa, asid, iotlb);
} else {
r = iommu_map(v->domain, iova, pa, size,
- perm_to_iommu_flags(perm));
+ perm_to_iommu_flags(perm),
+ GFP_KERNEL_ACCOUNT);
}
if (r) {
- vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+ vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
return r;
}
@@ -614,25 +1022,25 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
return 0;
}
-static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
+static void vhost_vdpa_unmap(struct vhost_vdpa *v,
+ struct vhost_iotlb *iotlb,
+ u64 iova, u64 size)
{
- struct vhost_dev *dev = &v->vdev;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
+ u32 asid = iotlb_to_asid(iotlb);
- vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1);
+ vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
- if (ops->dma_map) {
- ops->dma_unmap(vdpa, iova, size);
- } else if (ops->set_map) {
+ if (ops->set_map) {
if (!v->in_batch)
- ops->set_map(vdpa, dev->iotlb);
- } else {
- iommu_unmap(v->domain, iova, size);
+ ops->set_map(vdpa, asid, iotlb);
}
+
}
static int vhost_vdpa_va_map(struct vhost_vdpa *v,
+ struct vhost_iotlb *iotlb,
u64 iova, u64 size, u64 uaddr, u32 perm)
{
struct vhost_dev *dev = &v->vdev;
@@ -662,7 +1070,7 @@ static int vhost_vdpa_va_map(struct vhost_vdpa *v,
offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
map_file->offset = offset;
map_file->file = get_file(vma->vm_file);
- ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
+ ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
perm, map_file);
if (ret) {
fput(map_file->file);
@@ -675,7 +1083,7 @@ next:
map_iova += map_size;
}
if (ret)
- vhost_vdpa_unmap(v, iova, map_iova - iova);
+ vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
mmap_read_unlock(dev->mm);
@@ -683,6 +1091,7 @@ next:
}
static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
+ struct vhost_iotlb *iotlb,
u64 iova, u64 size, u64 uaddr, u32 perm)
{
struct vhost_dev *dev = &v->vdev;
@@ -724,7 +1133,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
while (npages) {
sz2pin = min_t(unsigned long, npages, list_size);
pinned = pin_user_pages(cur_base, sz2pin,
- gup_flags, page_list, NULL);
+ gup_flags, page_list);
if (sz2pin != pinned) {
if (pinned < 0) {
ret = pinned;
@@ -746,7 +1155,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
if (last_pfn && (this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
csize = PFN_PHYS(last_pfn - map_pfn + 1);
- ret = vhost_vdpa_map(v, iova, csize,
+ ret = vhost_vdpa_map(v, iotlb, iova, csize,
PFN_PHYS(map_pfn),
perm, NULL);
if (ret) {
@@ -776,7 +1185,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
}
/* Pin the rest chunk */
- ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1),
+ ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
PFN_PHYS(map_pfn), perm, NULL);
out:
if (ret) {
@@ -796,7 +1205,7 @@ out:
for (pfn = map_pfn; pfn <= last_pfn; pfn++)
unpin_user_page(pfn_to_page(pfn));
}
- vhost_vdpa_unmap(v, start, size);
+ vhost_vdpa_unmap(v, iotlb, start, size);
}
unlock:
mmap_read_unlock(dev->mm);
@@ -807,11 +1216,10 @@ free:
}
static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
+ struct vhost_iotlb *iotlb,
struct vhost_iotlb_msg *msg)
{
- struct vhost_dev *dev = &v->vdev;
struct vdpa_device *vdpa = v->vdpa;
- struct vhost_iotlb *iotlb = dev->iotlb;
if (msg->iova < v->range.first || !msg->size ||
msg->iova > U64_MAX - msg->size + 1 ||
@@ -823,19 +1231,21 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
return -EEXIST;
if (vdpa->use_va)
- return vhost_vdpa_va_map(v, msg->iova, msg->size,
+ return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
msg->uaddr, msg->perm);
- return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr,
+ return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
msg->perm);
}
-static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
+static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
struct vhost_iotlb_msg *msg)
{
struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
+ struct vhost_iotlb *iotlb = NULL;
+ struct vhost_vdpa_as *as = NULL;
int r = 0;
mutex_lock(&dev->mutex);
@@ -844,19 +1254,44 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
if (r)
goto unlock;
+ if (msg->type == VHOST_IOTLB_UPDATE ||
+ msg->type == VHOST_IOTLB_BATCH_BEGIN) {
+ as = vhost_vdpa_find_alloc_as(v, asid);
+ if (!as) {
+ dev_err(&v->dev, "can't find and alloc asid %d\n",
+ asid);
+ r = -EINVAL;
+ goto unlock;
+ }
+ iotlb = &as->iotlb;
+ } else
+ iotlb = asid_to_iotlb(v, asid);
+
+ if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
+ if (v->in_batch && v->batch_asid != asid) {
+ dev_info(&v->dev, "batch id %d asid %d\n",
+ v->batch_asid, asid);
+ }
+ if (!iotlb)
+ dev_err(&v->dev, "no iotlb for asid %d\n", asid);
+ r = -EINVAL;
+ goto unlock;
+ }
+
switch (msg->type) {
case VHOST_IOTLB_UPDATE:
- r = vhost_vdpa_process_iotlb_update(v, msg);
+ r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
break;
case VHOST_IOTLB_INVALIDATE:
- vhost_vdpa_unmap(v, msg->iova, msg->size);
+ vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
break;
case VHOST_IOTLB_BATCH_BEGIN:
+ v->batch_asid = asid;
v->in_batch = true;
break;
case VHOST_IOTLB_BATCH_END:
if (v->in_batch && ops->set_map)
- ops->set_map(vdpa, dev->iotlb);
+ ops->set_map(vdpa, asid, iotlb);
v->in_batch = false;
break;
default:
@@ -883,24 +1318,26 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
- struct device *dma_dev = vdpa_get_dma_dev(vdpa);
- struct bus_type *bus;
+ union virtio_map map = vdpa_get_map(vdpa);
+ struct device *dma_dev = map.dma_dev;
int ret;
/* Device want to do DMA by itself */
if (ops->set_map || ops->dma_map)
return 0;
- bus = dma_dev->bus;
- if (!bus)
- return -EFAULT;
-
- if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
+ if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
+ dev_warn_once(&v->dev,
+ "Failed to allocate domain, device is not IOMMU cache coherent capable\n");
return -ENOTSUPP;
+ }
- v->domain = iommu_domain_alloc(bus);
- if (!v->domain)
- return -EIO;
+ v->domain = iommu_paging_domain_alloc(dma_dev);
+ if (IS_ERR(v->domain)) {
+ ret = PTR_ERR(v->domain);
+ v->domain = NULL;
+ return ret;
+ }
ret = iommu_attach_device(v->domain, dma_dev);
if (ret)
@@ -910,13 +1347,15 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
err_attach:
iommu_domain_free(v->domain);
+ v->domain = NULL;
return ret;
}
static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
- struct device *dma_dev = vdpa_get_dma_dev(vdpa);
+ union virtio_map map = vdpa_get_map(vdpa);
+ struct device *dma_dev = map.dma_dev;
if (v->domain) {
iommu_detach_device(v->domain, dma_dev);
@@ -943,12 +1382,30 @@ static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
}
}
+static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
+{
+ struct vhost_vdpa_as *as;
+ u32 asid;
+
+ for (asid = 0; asid < v->vdpa->nas; asid++) {
+ as = asid_to_as(v, asid);
+ if (as)
+ vhost_vdpa_remove_as(v, asid);
+ }
+
+ vhost_vdpa_free_domain(v);
+ vhost_dev_cleanup(&v->vdev);
+ kfree(v->vdev.vqs);
+ v->vdev.vqs = NULL;
+}
+
static int vhost_vdpa_open(struct inode *inode, struct file *filep)
{
struct vhost_vdpa *v;
struct vhost_dev *dev;
struct vhost_virtqueue **vqs;
- int nvqs, i, r, opened;
+ int r, opened;
+ u32 i, nvqs;
v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
@@ -971,19 +1428,14 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
for (i = 0; i < nvqs; i++) {
vqs[i] = &v->vqs[i];
vqs[i]->handle_kick = handle_vq_kick;
+ vqs[i]->call_ctx.ctx = NULL;
}
vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
vhost_vdpa_process_iotlb_msg);
- dev->iotlb = vhost_iotlb_alloc(0, 0);
- if (!dev->iotlb) {
- r = -ENOMEM;
- goto err_init_iotlb;
- }
-
r = vhost_vdpa_alloc_domain(v);
if (r)
- goto err_init_iotlb;
+ goto err_alloc_domain;
vhost_vdpa_set_iova_range(v);
@@ -991,9 +1443,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
return 0;
-err_init_iotlb:
- vhost_dev_cleanup(&v->vdev);
- kfree(vqs);
+err_alloc_domain:
+ vhost_vdpa_cleanup(v);
err:
atomic_dec(&v->opened);
return r;
@@ -1001,7 +1452,7 @@ err:
static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
{
- int i;
+ u32 i;
for (i = 0; i < v->nvqs; i++)
vhost_vdpa_unsetup_vq_irq(v, i);
@@ -1017,11 +1468,9 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
vhost_vdpa_clean_irq(v);
vhost_vdpa_reset(v);
vhost_dev_stop(&v->vdev);
- vhost_vdpa_iotlb_free(v);
- vhost_vdpa_free_domain(v);
+ vhost_vdpa_unbind_mm(v);
vhost_vdpa_config_put(v);
- vhost_dev_cleanup(&v->vdev);
- kfree(v->vdev.vqs);
+ vhost_vdpa_cleanup(v);
mutex_unlock(&d->mutex);
atomic_dec(&v->opened);
@@ -1042,13 +1491,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
notify = ops->get_vq_notification(vdpa, index);
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
- PFN_DOWN(notify.addr), PAGE_SIZE,
- vma->vm_page_prot))
- return VM_FAULT_SIGBUS;
-
- return VM_FAULT_NOPAGE;
+ return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr));
}
static const struct vm_operations_struct vhost_vdpa_vm_ops = {
@@ -1084,7 +1527,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_end - vma->vm_start != notify.size)
return -ENOTSUPP;
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &vhost_vdpa_vm_ops;
return 0;
}
@@ -1107,7 +1550,7 @@ static void vhost_vdpa_release_dev(struct device *device)
struct vhost_vdpa *v =
container_of(device, struct vhost_vdpa, dev);
- ida_simple_remove(&vhost_vdpa_ida, v->minor);
+ ida_free(&vhost_vdpa_ida, v->minor);
kfree(v->vqs);
kfree(v);
}
@@ -1117,14 +1560,21 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_vdpa *v;
int minor;
- int r;
+ int i, r;
+
+ /* We can't support platform IOMMU device with more than 1
+ * group or as
+ */
+ if (!ops->set_map && !ops->dma_map &&
+ (vdpa->ngroups > 1 || vdpa->nas > 1))
+ return -EOPNOTSUPP;
v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!v)
return -ENOMEM;
- minor = ida_simple_get(&vhost_vdpa_ida, 0,
- VHOST_VDPA_DEV_MAX, GFP_KERNEL);
+ minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1,
+ GFP_KERNEL);
if (minor < 0) {
kfree(v);
return minor;
@@ -1161,6 +1611,9 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
init_completion(&v->completion);
vdpa_set_drvdata(vdpa, v);
+ for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
+ INIT_HLIST_HEAD(&v->as[i]);
+
return 0;
err: