summaryrefslogtreecommitdiff
path: root/drivers/vdpa
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vdpa')
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.c14
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.h2
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_main.c144
-rw-r--r--drivers/vdpa/mlx5/core/mlx5_vdpa.h11
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c173
-rw-r--r--drivers/vdpa/vdpa.c14
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c18
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.h1
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_blk.c176
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_net.c3
-rw-r--r--drivers/vdpa/vdpa_user/iova_domain.c102
-rw-r--r--drivers/vdpa/vdpa_user/iova_domain.h8
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c180
13 files changed, 706 insertions, 140 deletions
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 48c4dadb0c7c..75a703b803a2 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -29,7 +29,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
- cfg = hw->common_cfg;
vp_iowrite16(vector, &cfg->msix_config);
return vp_ioread16(&cfg->msix_config);
@@ -128,6 +127,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
hw->dev_cfg = get_cap_addr(hw, &cap);
+ hw->cap_dev_config_size = le32_to_cpu(cap.length);
IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg);
break;
}
@@ -233,15 +233,23 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
{
struct ifcvf_adapter *adapter;
+ u32 net_config_size = sizeof(struct virtio_net_config);
+ u32 blk_config_size = sizeof(struct virtio_blk_config);
+ u32 cap_size = hw->cap_dev_config_size;
u32 config_size;
adapter = vf_to_adapter(hw);
+ /* If the onboard device config space size is greater than
+ * the size of struct virtio_net/blk_config, only the spec
+ * implementing contents size is returned, this is very
+ * unlikely, defensive programming.
+ */
switch (hw->dev_type) {
case VIRTIO_ID_NET:
- config_size = sizeof(struct virtio_net_config);
+ config_size = min(cap_size, net_config_size);
break;
case VIRTIO_ID_BLOCK:
- config_size = sizeof(struct virtio_blk_config);
+ config_size = min(cap_size, blk_config_size);
break;
default:
config_size = 0;
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 115b61f4924b..f5563f665cc6 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -87,6 +87,8 @@ struct ifcvf_hw {
int config_irq;
int vqs_reused_irq;
u16 nr_vring;
+ /* VIRTIO_PCI_CAP_DEVICE_CFG size */
+ u32 cap_dev_config_size;
};
struct ifcvf_adapter {
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 0a5670729412..f9c0044c6442 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -685,7 +685,7 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic
}
/*
- * IFCVF currently does't have on-chip IOMMU, so not
+ * IFCVF currently doesn't have on-chip IOMMU, so not
* implemented set_map()/dma_map()/dma_unmap()
*/
static const struct vdpa_config_ops ifc_vdpa_ops = {
@@ -752,59 +752,36 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct ifcvf_adapter *adapter;
+ struct vdpa_device *vdpa_dev;
struct pci_dev *pdev;
struct ifcvf_hw *vf;
- struct device *dev;
- int ret, i;
+ int ret;
ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
- if (ifcvf_mgmt_dev->adapter)
+ if (!ifcvf_mgmt_dev->adapter)
return -EOPNOTSUPP;
- pdev = ifcvf_mgmt_dev->pdev;
- dev = &pdev->dev;
- adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
- dev, &ifc_vdpa_ops, 1, 1, name, false);
- if (IS_ERR(adapter)) {
- IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
- return PTR_ERR(adapter);
- }
-
- ifcvf_mgmt_dev->adapter = adapter;
-
+ adapter = ifcvf_mgmt_dev->adapter;
vf = &adapter->vf;
- vf->dev_type = get_dev_type(pdev);
- vf->base = pcim_iomap_table(pdev);
+ pdev = adapter->pdev;
+ vdpa_dev = &adapter->vdpa;
- adapter->pdev = pdev;
- adapter->vdpa.dma_dev = &pdev->dev;
-
- ret = ifcvf_init_hw(vf, pdev);
- if (ret) {
- IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
- goto err;
- }
-
- for (i = 0; i < vf->nr_vring; i++)
- vf->vring[i].irq = -EINVAL;
-
- vf->hw_features = ifcvf_get_hw_features(vf);
- vf->config_size = ifcvf_get_config_size(vf);
+ if (name)
+ ret = dev_set_name(&vdpa_dev->dev, "%s", name);
+ else
+ ret = dev_set_name(&vdpa_dev->dev, "vdpa%u", vdpa_dev->index);
- adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
if (ret) {
+ put_device(&adapter->vdpa.dev);
IFCVF_ERR(pdev, "Failed to register to vDPA bus");
- goto err;
+ return ret;
}
return 0;
-
-err:
- put_device(&adapter->vdpa.dev);
- return ret;
}
+
static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
@@ -823,61 +800,94 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct device *dev = &pdev->dev;
+ struct ifcvf_adapter *adapter;
+ struct ifcvf_hw *vf;
u32 dev_type;
- int ret;
-
- ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
- if (!ifcvf_mgmt_dev) {
- IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
- return -ENOMEM;
- }
-
- dev_type = get_dev_type(pdev);
- switch (dev_type) {
- case VIRTIO_ID_NET:
- ifcvf_mgmt_dev->mdev.id_table = id_table_net;
- break;
- case VIRTIO_ID_BLOCK:
- ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
- break;
- default:
- IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
- ret = -EOPNOTSUPP;
- goto err;
- }
-
- ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
- ifcvf_mgmt_dev->mdev.device = dev;
- ifcvf_mgmt_dev->pdev = pdev;
+ int ret, i;
ret = pcim_enable_device(pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to enable device\n");
- goto err;
+ return ret;
}
-
ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
IFCVF_DRIVER_NAME);
if (ret) {
IFCVF_ERR(pdev, "Failed to request MMIO region\n");
- goto err;
+ return ret;
}
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) {
IFCVF_ERR(pdev, "No usable DMA configuration\n");
- goto err;
+ return ret;
}
ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
if (ret) {
IFCVF_ERR(pdev,
"Failed for adding devres for freeing irq vectors\n");
- goto err;
+ return ret;
}
pci_set_master(pdev);
+ adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
+ dev, &ifc_vdpa_ops, 1, 1, NULL, false);
+ if (IS_ERR(adapter)) {
+ IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
+ return PTR_ERR(adapter);
+ }
+
+ vf = &adapter->vf;
+ vf->dev_type = get_dev_type(pdev);
+ vf->base = pcim_iomap_table(pdev);
+
+ adapter->pdev = pdev;
+ adapter->vdpa.dma_dev = &pdev->dev;
+
+ ret = ifcvf_init_hw(vf, pdev);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
+ return ret;
+ }
+
+ for (i = 0; i < vf->nr_vring; i++)
+ vf->vring[i].irq = -EINVAL;
+
+ vf->hw_features = ifcvf_get_hw_features(vf);
+ vf->config_size = ifcvf_get_config_size(vf);
+
+ ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
+ if (!ifcvf_mgmt_dev) {
+ IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
+ return -ENOMEM;
+ }
+
+ ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
+ ifcvf_mgmt_dev->mdev.device = dev;
+ ifcvf_mgmt_dev->adapter = adapter;
+
+ dev_type = get_dev_type(pdev);
+ switch (dev_type) {
+ case VIRTIO_ID_NET:
+ ifcvf_mgmt_dev->mdev.id_table = id_table_net;
+ break;
+ case VIRTIO_ID_BLOCK:
+ ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
+ break;
+ default:
+ IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
+ ret = -EOPNOTSUPP;
+ goto err;
+ }
+
+ ifcvf_mgmt_dev->mdev.max_supported_vqs = vf->nr_vring;
+ ifcvf_mgmt_dev->mdev.supported_features = vf->hw_features;
+
+ adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
+
+
ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
if (ret) {
IFCVF_ERR(pdev,
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 44104093163b..6af9fdbb86b7 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
struct mlx5_vdpa_dev *mvdev;
};
+enum {
+ MLX5_VDPA_DATAVQ_GROUP,
+ MLX5_VDPA_CVQ_GROUP,
+ MLX5_VDPA_NUMVQ_GROUPS
+};
+
+enum {
+ MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
+};
+
struct mlx5_vdpa_dev {
struct vdpa_device vdev;
struct mlx5_core_dev *mdev;
@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
struct mlx5_vdpa_mr mr;
struct mlx5_control_vq cvq;
struct workqueue_struct *wq;
+ unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
};
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index e85c1d71f4ed..ed100a35e596 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -164,6 +164,7 @@ struct mlx5_vdpa_net {
bool setup;
u32 cur_num_vqs;
u32 rqt_size;
+ bool nb_registered;
struct notifier_block nb;
struct vdpa_callback config_cb;
struct mlx5_vdpa_wq_ent cvq_ent;
@@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
if (err)
goto err_cmd;
+ mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
kfree(in);
mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
@@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
return;
}
+ mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
umems_destroy(ndev, mvq);
}
@@ -1121,6 +1124,20 @@ err_cmd:
return err;
}
+static bool is_valid_state_change(int oldstate, int newstate)
+{
+ switch (oldstate) {
+ case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
+ return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
+ case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
+ return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
+ case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
+ case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
+ default:
+ return false;
+ }
+}
+
static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
{
int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
@@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
void *in;
int err;
+ if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
+ return 0;
+
+ if (!is_valid_state_change(mvq->fw_state, state))
+ return -EINVAL;
+
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1440,7 +1463,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
- memset(dmac_c, 0xff, ETH_ALEN);
+ eth_broadcast_addr(dmac_c);
ether_addr_copy(dmac_v, mac);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
if (tagged) {
@@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
+ int err;
if (!mvdev->actual_features)
return;
@@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
}
mvq = &ndev->vqs[idx];
- if (!ready)
+ if (!ready) {
suspend_vq(ndev, mvq);
+ } else {
+ err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
+ if (err) {
+ mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
+ ready = false;
+ }
+ }
+
mvq->ready = ready;
}
@@ -2095,9 +2127,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
return PAGE_SIZE;
}
-static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
+static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
{
- return 0;
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+ if (is_ctrl_vq_idx(mvdev, idx))
+ return MLX5_VDPA_CVQ_GROUP;
+
+ return MLX5_VDPA_DATAVQ_GROUP;
}
enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
@@ -2511,6 +2548,15 @@ err_clear:
up_write(&ndev->reslock);
}
+static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
+{
+ int i;
+
+ /* default mapping all groups are mapped to asid 0 */
+ for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
+ mvdev->group2asid[i] = 0;
+}
+
static int mlx5_vdpa_reset(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2529,7 +2575,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
ndev->mvdev.cvq.completed_desc = 0;
memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
ndev->mvdev.actual_features = 0;
+ init_group_to_asid_map(mvdev);
++mvdev->generation;
+
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
if (mlx5_vdpa_create_mr(mvdev, NULL))
mlx5_vdpa_warn(mvdev, "create MR failed\n");
@@ -2567,26 +2615,63 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
return mvdev->generation;
}
-static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
- struct vhost_iotlb *iotlb)
+static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+ u64 start = 0ULL, last = 0ULL - 1;
+ struct vhost_iotlb_map *map;
+ int err = 0;
+
+ spin_lock(&mvdev->cvq.iommu_lock);
+ vhost_iotlb_reset(mvdev->cvq.iotlb);
+
+ for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+ map = vhost_iotlb_itree_next(map, start, last)) {
+ err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
+ map->last, map->addr, map->perm);
+ if (err)
+ goto out;
+ }
+
+out:
+ spin_unlock(&mvdev->cvq.iommu_lock);
+ return err;
+}
+
+static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{
- struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
- struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
bool change_map;
int err;
- down_write(&ndev->reslock);
-
err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
if (err) {
mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
- goto err;
+ return err;
}
if (change_map)
err = mlx5_vdpa_change_map(mvdev, iotlb);
-err:
+ return err;
+}
+
+static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
+ struct vhost_iotlb *iotlb)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ int err = -EINVAL;
+
+ down_write(&ndev->reslock);
+ if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+ err = set_map_data(mvdev, iotlb);
+ if (err)
+ goto out;
+ }
+
+ if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
+ err = set_map_control(mvdev, iotlb);
+
+out:
up_write(&ndev->reslock);
return err;
}
@@ -2733,6 +2818,49 @@ out_err:
return err;
}
+static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_control_vq *cvq;
+
+ if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+ return;
+
+ cvq = &mvdev->cvq;
+ cvq->ready = false;
+}
+
+static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ struct mlx5_vdpa_virtqueue *mvq;
+ int i;
+
+ down_write(&ndev->reslock);
+ mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
+ ndev->nb_registered = false;
+ flush_workqueue(ndev->mvdev.wq);
+ for (i = 0; i < ndev->cur_num_vqs; i++) {
+ mvq = &ndev->vqs[i];
+ suspend_vq(ndev, mvq);
+ }
+ mlx5_vdpa_cvq_suspend(mvdev);
+ up_write(&ndev->reslock);
+ return 0;
+}
+
+static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
+ unsigned int asid)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+ if (group >= MLX5_VDPA_NUMVQ_GROUPS)
+ return -EINVAL;
+
+ mvdev->group2asid[group] = asid;
+ return 0;
+}
+
static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_vq_address = mlx5_vdpa_set_vq_address,
.set_vq_num = mlx5_vdpa_set_vq_num,
@@ -2762,7 +2890,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation,
.set_map = mlx5_vdpa_set_map,
+ .set_group_asid = mlx5_set_group_asid,
.free = mlx5_vdpa_free,
+ .suspend = mlx5_vdpa_suspend,
};
static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
@@ -2828,6 +2958,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
mvq->index = i;
mvq->ndev = ndev;
mvq->fwqp.fw = true;
+ mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
}
for (; i < ndev->mvdev.max_vqs; i++) {
mvq = &ndev->vqs[i];
@@ -2902,13 +3033,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ down_read(&ndev->reslock);
+ if (!ndev->nb_registered) {
+ up_read(&ndev->reslock);
+ return NOTIFY_DONE;
+ }
wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
- if (!wqent)
+ if (!wqent) {
+ up_read(&ndev->reslock);
return NOTIFY_DONE;
+ }
wqent->mvdev = &ndev->mvdev;
INIT_WORK(&wqent->work, update_carrier);
queue_work(ndev->mvdev.wq, &wqent->work);
+ up_read(&ndev->reslock);
ret = NOTIFY_OK;
break;
default:
@@ -2982,7 +3121,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
}
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
- 1, 1, name, false);
+ MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
@@ -3062,6 +3201,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
ndev->nb.notifier_call = event_handler;
mlx5_notifier_register(mdev, &ndev->nb);
+ ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err)
@@ -3093,7 +3233,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct workqueue_struct *wq;
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
+ if (ndev->nb_registered) {
+ mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
+ ndev->nb_registered = false;
+ }
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index ebf2f363fbe7..c06c02704461 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -824,11 +824,11 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
config.mac))
return -EMSGSIZE;
- val_u16 = le16_to_cpu(config.status);
+ val_u16 = __virtio16_to_cpu(true, config.status);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
return -EMSGSIZE;
- val_u16 = le16_to_cpu(config.mtu);
+ val_u16 = __virtio16_to_cpu(true, config.mtu);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
return -EMSGSIZE;
@@ -846,17 +846,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
{
u32 device_id;
void *hdr;
- u8 status;
int err;
down_read(&vdev->cf_lock);
- status = vdev->config->get_status(vdev);
- if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
- NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed");
- err = -EAGAIN;
- goto out;
- }
-
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
VDPA_CMD_DEV_CONFIG_GET);
if (!hdr) {
@@ -913,7 +905,7 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
}
vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
- max_vqp = le16_to_cpu(config.max_virtqueue_pairs);
+ max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp))
return -EMSGSIZE;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 0f2865899647..225b7f5d8be3 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -33,7 +33,7 @@ MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
static int max_iotlb_entries = 2048;
module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
- "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)");
+ "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_reset(&vdpasim->iommu[i]);
+ vdpasim->running = true;
spin_unlock(&vdpasim->iommu_lock);
vdpasim->features = 0;
@@ -291,7 +292,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
goto err_iommu;
for (i = 0; i < vdpasim->dev_attr.nas; i++)
- vhost_iotlb_init(&vdpasim->iommu[i], 0, 0);
+ vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
if (!vdpasim->buffer)
@@ -505,6 +506,17 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
return 0;
}
+static int vdpasim_suspend(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ spin_lock(&vdpasim->lock);
+ vdpasim->running = false;
+ spin_unlock(&vdpasim->lock);
+
+ return 0;
+}
+
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -694,6 +706,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset = vdpasim_reset,
+ .suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
@@ -726,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset = vdpasim_reset,
+ .suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 622782e92239..061986f30911 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -66,6 +66,7 @@ struct vdpasim {
u32 generation;
u64 features;
u32 groups;
+ bool running;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index 42d401d43911..c8bfea3b7db2 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -25,31 +25,49 @@
#define DRV_LICENSE "GPL v2"
#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
+ (1ULL << VIRTIO_BLK_F_FLUSH) | \
(1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
(1ULL << VIRTIO_BLK_F_SEG_MAX) | \
(1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
(1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
- (1ULL << VIRTIO_BLK_F_MQ))
+ (1ULL << VIRTIO_BLK_F_MQ) | \
+ (1ULL << VIRTIO_BLK_F_DISCARD) | \
+ (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
#define VDPASIM_BLK_CAPACITY 0x40000
#define VDPASIM_BLK_SIZE_MAX 0x1000
#define VDPASIM_BLK_SEG_MAX 32
+#define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
+
+/* 1 virtqueue, 1 address space, 1 virtqueue group */
#define VDPASIM_BLK_VQ_NUM 1
+#define VDPASIM_BLK_AS_NUM 1
+#define VDPASIM_BLK_GROUP_NUM 1
static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
-static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size)
+static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
+ u64 num_sectors, u64 max_sectors)
{
- u64 range_sectors = range_size >> SECTOR_SHIFT;
-
- if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)
- return false;
+ if (start_sector > VDPASIM_BLK_CAPACITY) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
+ start_sector, VDPASIM_BLK_CAPACITY);
+ }
- if (start_sector > VDPASIM_BLK_CAPACITY)
+ if (num_sectors > max_sectors) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
+ num_sectors, max_sectors);
return false;
+ }
- if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector)
+ if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
+ start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
return false;
+ }
return true;
}
@@ -63,6 +81,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
{
size_t pushed = 0, to_pull, to_push;
struct virtio_blk_outhdr hdr;
+ bool handled = false;
ssize_t bytes;
loff_t offset;
u64 sector;
@@ -76,14 +95,14 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
return false;
if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
- dev_err(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
+ dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
vq->out_iov.used, vq->in_iov.used);
- return false;
+ goto err;
}
if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
- dev_err(&vdpasim->vdpa.dev, "request in header too short\n");
- return false;
+ dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
+ goto err;
}
/* The last byte is the status and we checked if the last iov has
@@ -96,8 +115,8 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
sizeof(hdr));
if (bytes != sizeof(hdr)) {
- dev_err(&vdpasim->vdpa.dev, "request out header too short\n");
- return false;
+ dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
+ goto err;
}
to_pull -= bytes;
@@ -107,12 +126,20 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
offset = sector << SECTOR_SHIFT;
status = VIRTIO_BLK_S_OK;
+ if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
+ sector != 0) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "sector must be 0 for %u request - sector: 0x%llx\n",
+ type, sector);
+ status = VIRTIO_BLK_S_IOERR;
+ goto err_status;
+ }
+
switch (type) {
case VIRTIO_BLK_T_IN:
- if (!vdpasim_blk_check_range(sector, to_push)) {
- dev_err(&vdpasim->vdpa.dev,
- "reading over the capacity - offset: 0x%llx len: 0x%zx\n",
- offset, to_push);
+ if (!vdpasim_blk_check_range(vdpasim, sector,
+ to_push >> SECTOR_SHIFT,
+ VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
status = VIRTIO_BLK_S_IOERR;
break;
}
@@ -121,7 +148,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
vdpasim->buffer + offset,
to_push);
if (bytes < 0) {
- dev_err(&vdpasim->vdpa.dev,
+ dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
bytes, offset, to_push);
status = VIRTIO_BLK_S_IOERR;
@@ -132,10 +159,9 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
break;
case VIRTIO_BLK_T_OUT:
- if (!vdpasim_blk_check_range(sector, to_pull)) {
- dev_err(&vdpasim->vdpa.dev,
- "writing over the capacity - offset: 0x%llx len: 0x%zx\n",
- offset, to_pull);
+ if (!vdpasim_blk_check_range(vdpasim, sector,
+ to_pull >> SECTOR_SHIFT,
+ VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
status = VIRTIO_BLK_S_IOERR;
break;
}
@@ -144,7 +170,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
vdpasim->buffer + offset,
to_pull);
if (bytes < 0) {
- dev_err(&vdpasim->vdpa.dev,
+ dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
bytes, offset, to_pull);
status = VIRTIO_BLK_S_IOERR;
@@ -157,7 +183,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
vdpasim_blk_id,
VIRTIO_BLK_ID_BYTES);
if (bytes < 0) {
- dev_err(&vdpasim->vdpa.dev,
+ dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd\n", bytes);
status = VIRTIO_BLK_S_IOERR;
break;
@@ -166,13 +192,76 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
pushed += bytes;
break;
+ case VIRTIO_BLK_T_FLUSH:
+ /* nothing to do */
+ break;
+
+ case VIRTIO_BLK_T_DISCARD:
+ case VIRTIO_BLK_T_WRITE_ZEROES: {
+ struct virtio_blk_discard_write_zeroes range;
+ u32 num_sectors, flags;
+
+ if (to_pull != sizeof(range)) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
+ to_pull, sizeof(range));
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
+ to_pull);
+ if (bytes < 0) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
+ bytes, offset, to_pull);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ sector = le64_to_cpu(range.sector);
+ offset = sector << SECTOR_SHIFT;
+ num_sectors = le32_to_cpu(range.num_sectors);
+ flags = le32_to_cpu(range.flags);
+
+ if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "discard unexpected flags set - flags: 0x%x\n",
+ flags);
+ status = VIRTIO_BLK_S_UNSUPP;
+ break;
+ }
+
+ if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
+ flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+ dev_dbg(&vdpasim->vdpa.dev,
+ "write_zeroes unexpected flags set - flags: 0x%x\n",
+ flags);
+ status = VIRTIO_BLK_S_UNSUPP;
+ break;
+ }
+
+ if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
+ VDPASIM_BLK_DWZ_MAX_SECTORS)) {
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+ memset(vdpasim->buffer + offset, 0,
+ num_sectors << SECTOR_SHIFT);
+ }
+
+ break;
+ }
default:
- dev_warn(&vdpasim->vdpa.dev,
- "Unsupported request type %d\n", type);
+ dev_dbg(&vdpasim->vdpa.dev,
+ "Unsupported request type %d\n", type);
status = VIRTIO_BLK_S_IOERR;
break;
}
+err_status:
/* If some operations fail, we need to skip the remaining bytes
* to put the status in the last byte
*/
@@ -182,21 +271,25 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
/* Last byte is the status */
bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
if (bytes != 1)
- return false;
+ goto err;
pushed += bytes;
/* Make sure data is wrote before advancing index */
smp_wmb();
+ handled = true;
+
+err:
vringh_complete_iotlb(&vq->vring, vq->head, pushed);
- return true;
+ return handled;
}
static void vdpasim_blk_work(struct work_struct *work)
{
struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+ bool reschedule = false;
int i;
spin_lock(&vdpasim->lock);
@@ -204,8 +297,12 @@ static void vdpasim_blk_work(struct work_struct *work)
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
+ if (!vdpasim->running)
+ goto out;
+
for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
+ int reqs = 0;
if (!vq->ready)
continue;
@@ -218,10 +315,18 @@ static void vdpasim_blk_work(struct work_struct *work)
if (vringh_need_notify_iotlb(&vq->vring) > 0)
vringh_notify(&vq->vring);
local_bh_enable();
+
+ if (++reqs > 4) {
+ reschedule = true;
+ break;
+ }
}
}
out:
spin_unlock(&vdpasim->lock);
+
+ if (reschedule)
+ schedule_work(&vdpasim->work);
}
static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
@@ -237,6 +342,17 @@ static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
+ /* VIRTIO_BLK_F_DISCARD */
+ blk_config->discard_sector_alignment =
+ cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
+ blk_config->max_discard_sectors =
+ cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
+ blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
+ /* VIRTIO_BLK_F_WRITE_ZEROES */
+ blk_config->max_write_zeroes_sectors =
+ cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
+ blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
+
}
static void vdpasim_blk_mgmtdev_release(struct device *dev)
@@ -260,6 +376,8 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
dev_attr.id = VIRTIO_ID_BLOCK;
dev_attr.supported_features = VDPASIM_BLK_FEATURES;
dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
+ dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
+ dev_attr.nas = VDPASIM_BLK_AS_NUM;
dev_attr.config_size = sizeof(struct virtio_blk_config);
dev_attr.get_config = vdpasim_blk_get_config;
dev_attr.work_fn = vdpasim_blk_work;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index 5125976a4df8..886449e88502 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work)
spin_lock(&vdpasim->lock);
+ if (!vdpasim->running)
+ goto out;
+
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
index 6daa3978d290..e682bc7ee6c9 100644
--- a/drivers/vdpa/vdpa_user/iova_domain.c
+++ b/drivers/vdpa/vdpa_user/iova_domain.c
@@ -138,18 +138,17 @@ static void do_bounce(phys_addr_t orig, void *addr, size_t size,
{
unsigned long pfn = PFN_DOWN(orig);
unsigned int offset = offset_in_page(orig);
- char *buffer;
+ struct page *page;
unsigned int sz = 0;
while (size) {
sz = min_t(size_t, PAGE_SIZE - offset, size);
- buffer = kmap_atomic(pfn_to_page(pfn));
+ page = pfn_to_page(pfn);
if (dir == DMA_TO_DEVICE)
- memcpy(addr, buffer + offset, sz);
+ memcpy_from_page(addr, page, offset, sz);
else
- memcpy(buffer + offset, addr, sz);
- kunmap_atomic(buffer);
+ memcpy_to_page(page, offset, addr, sz);
size -= sz;
pfn++;
@@ -179,8 +178,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
map->orig_phys == INVALID_PHYS_ADDR))
return;
- addr = page_address(map->bounce_page) + offset;
- do_bounce(map->orig_phys + offset, addr, sz, dir);
+ addr = kmap_local_page(map->bounce_page);
+ do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
+ kunmap_local(addr);
size -= sz;
iova += sz;
}
@@ -213,21 +213,21 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
struct vduse_bounce_map *map;
struct page *page = NULL;
- spin_lock(&domain->iotlb_lock);
+ read_lock(&domain->bounce_lock);
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
- if (!map->bounce_page)
+ if (domain->user_bounce_pages || !map->bounce_page)
goto out;
page = map->bounce_page;
get_page(page);
out:
- spin_unlock(&domain->iotlb_lock);
+ read_unlock(&domain->bounce_lock);
return page;
}
static void
-vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
+vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long pfn, bounce_pfns;
@@ -247,6 +247,73 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
}
}
+int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
+ struct page **pages, int count)
+{
+ struct vduse_bounce_map *map;
+ int i, ret;
+
+ /* Now we don't support partial mapping */
+ if (count != (domain->bounce_size >> PAGE_SHIFT))
+ return -EINVAL;
+
+ write_lock(&domain->bounce_lock);
+ ret = -EEXIST;
+ if (domain->user_bounce_pages)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ map = &domain->bounce_maps[i];
+ if (map->bounce_page) {
+ /* Copy kernel page to user page if it's in use */
+ if (map->orig_phys != INVALID_PHYS_ADDR)
+ memcpy_to_page(pages[i], 0,
+ page_address(map->bounce_page),
+ PAGE_SIZE);
+ __free_page(map->bounce_page);
+ }
+ map->bounce_page = pages[i];
+ get_page(pages[i]);
+ }
+ domain->user_bounce_pages = true;
+ ret = 0;
+out:
+ write_unlock(&domain->bounce_lock);
+
+ return ret;
+}
+
+void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
+{
+ struct vduse_bounce_map *map;
+ unsigned long i, count;
+
+ write_lock(&domain->bounce_lock);
+ if (!domain->user_bounce_pages)
+ goto out;
+
+ count = domain->bounce_size >> PAGE_SHIFT;
+ for (i = 0; i < count; i++) {
+ struct page *page = NULL;
+
+ map = &domain->bounce_maps[i];
+ if (WARN_ON(!map->bounce_page))
+ continue;
+
+ /* Copy user page to kernel page if it's in use */
+ if (map->orig_phys != INVALID_PHYS_ADDR) {
+ page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
+ memcpy_from_page(page_address(page),
+ map->bounce_page, 0, PAGE_SIZE);
+ }
+ put_page(map->bounce_page);
+ map->bounce_page = page;
+ }
+ domain->user_bounce_pages = false;
+out:
+ write_unlock(&domain->bounce_lock);
+}
+
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
{
if (!domain->bounce_map)
@@ -322,13 +389,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
if (vduse_domain_init_bounce_map(domain))
goto err;
+ read_lock(&domain->bounce_lock);
if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
- goto err;
+ goto err_unlock;
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
+ read_unlock(&domain->bounce_lock);
+
return iova;
+err_unlock:
+ read_unlock(&domain->bounce_lock);
err:
vduse_domain_free_iova(iovad, iova, size);
return DMA_MAPPING_ERROR;
@@ -340,10 +412,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
{
struct iova_domain *iovad = &domain->stream_iovad;
+ read_lock(&domain->bounce_lock);
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
+ read_unlock(&domain->bounce_lock);
vduse_domain_free_iova(iovad, dma_addr, size);
}
@@ -451,7 +525,8 @@ static int vduse_domain_release(struct inode *inode, struct file *file)
spin_lock(&domain->iotlb_lock);
vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
- vduse_domain_free_bounce_pages(domain);
+ vduse_domain_remove_user_bounce_pages(domain);
+ vduse_domain_free_kernel_bounce_pages(domain);
spin_unlock(&domain->iotlb_lock);
put_iova_domain(&domain->stream_iovad);
put_iova_domain(&domain->consistent_iovad);
@@ -511,6 +586,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
goto err_file;
domain->file = file;
+ rwlock_init(&domain->bounce_lock);
spin_lock_init(&domain->iotlb_lock);
init_iova_domain(&domain->stream_iovad,
PAGE_SIZE, IOVA_START_PFN);
diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h
index 2722d9b8e21a..4e0e50e7ac15 100644
--- a/drivers/vdpa/vdpa_user/iova_domain.h
+++ b/drivers/vdpa/vdpa_user/iova_domain.h
@@ -14,6 +14,7 @@
#include <linux/iova.h>
#include <linux/dma-mapping.h>
#include <linux/vhost_iotlb.h>
+#include <linux/rwlock.h>
#define IOVA_START_PFN 1
@@ -34,6 +35,8 @@ struct vduse_iova_domain {
struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock;
struct file *file;
+ bool user_bounce_pages;
+ rwlock_t bounce_lock;
};
int vduse_domain_set_map(struct vduse_iova_domain *domain,
@@ -61,6 +64,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
+int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
+ struct page **pages, int count);
+
+void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain);
+
void vduse_domain_destroy(struct vduse_iova_domain *domain);
struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 3bc27de58f46..41c0b29739f1 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -21,6 +21,8 @@
#include <linux/uio.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/mm.h>
#include <uapi/linux/vduse.h>
#include <uapi/linux/vdpa.h>
#include <uapi/linux/virtio_config.h>
@@ -64,6 +66,13 @@ struct vduse_vdpa {
struct vduse_dev *dev;
};
+struct vduse_umem {
+ unsigned long iova;
+ unsigned long npages;
+ struct page **pages;
+ struct mm_struct *mm;
+};
+
struct vduse_dev {
struct vduse_vdpa *vdev;
struct device *dev;
@@ -95,6 +104,8 @@ struct vduse_dev {
u8 status;
u32 vq_num;
u32 vq_align;
+ struct vduse_umem *umem;
+ struct mutex mem_lock;
};
struct vduse_dev_msg {
@@ -917,6 +928,102 @@ unlock:
return ret;
}
+static int vduse_dev_dereg_umem(struct vduse_dev *dev,
+ u64 iova, u64 size)
+{
+ int ret;
+
+ mutex_lock(&dev->mem_lock);
+ ret = -ENOENT;
+ if (!dev->umem)
+ goto unlock;
+
+ ret = -EINVAL;
+ if (dev->umem->iova != iova || size != dev->domain->bounce_size)
+ goto unlock;
+
+ vduse_domain_remove_user_bounce_pages(dev->domain);
+ unpin_user_pages_dirty_lock(dev->umem->pages,
+ dev->umem->npages, true);
+ atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
+ mmdrop(dev->umem->mm);
+ vfree(dev->umem->pages);
+ kfree(dev->umem);
+ dev->umem = NULL;
+ ret = 0;
+unlock:
+ mutex_unlock(&dev->mem_lock);
+ return ret;
+}
+
+static int vduse_dev_reg_umem(struct vduse_dev *dev,
+ u64 iova, u64 uaddr, u64 size)
+{
+ struct page **page_list = NULL;
+ struct vduse_umem *umem = NULL;
+ long pinned = 0;
+ unsigned long npages, lock_limit;
+ int ret;
+
+ if (!dev->domain->bounce_map ||
+ size != dev->domain->bounce_size ||
+ iova != 0 || uaddr & ~PAGE_MASK)
+ return -EINVAL;
+
+ mutex_lock(&dev->mem_lock);
+ ret = -EEXIST;
+ if (dev->umem)
+ goto unlock;
+
+ ret = -ENOMEM;
+ npages = size >> PAGE_SHIFT;
+ page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
+ GFP_KERNEL_ACCOUNT);
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!page_list || !umem)
+ goto unlock;
+
+ mmap_read_lock(current->mm);
+
+ lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
+ if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
+ goto out;
+
+ pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
+ page_list, NULL);
+ if (pinned != npages) {
+ ret = pinned < 0 ? pinned : -ENOMEM;
+ goto out;
+ }
+
+ ret = vduse_domain_add_user_bounce_pages(dev->domain,
+ page_list, pinned);
+ if (ret)
+ goto out;
+
+ atomic64_add(npages, &current->mm->pinned_vm);
+
+ umem->pages = page_list;
+ umem->npages = pinned;
+ umem->iova = iova;
+ umem->mm = current->mm;
+ mmgrab(current->mm);
+
+ dev->umem = umem;
+out:
+ if (ret && pinned > 0)
+ unpin_user_pages(page_list, pinned);
+
+ mmap_read_unlock(current->mm);
+unlock:
+ if (ret) {
+ vfree(page_list);
+ kfree(umem);
+ }
+ mutex_unlock(&dev->mem_lock);
+ return ret;
+}
+
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1089,6 +1196,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
break;
}
+ case VDUSE_IOTLB_REG_UMEM: {
+ struct vduse_iova_umem umem;
+
+ ret = -EFAULT;
+ if (copy_from_user(&umem, argp, sizeof(umem)))
+ break;
+
+ ret = -EINVAL;
+ if (!is_mem_zero((const char *)umem.reserved,
+ sizeof(umem.reserved)))
+ break;
+
+ ret = vduse_dev_reg_umem(dev, umem.iova,
+ umem.uaddr, umem.size);
+ break;
+ }
+ case VDUSE_IOTLB_DEREG_UMEM: {
+ struct vduse_iova_umem umem;
+
+ ret = -EFAULT;
+ if (copy_from_user(&umem, argp, sizeof(umem)))
+ break;
+
+ ret = -EINVAL;
+ if (!is_mem_zero((const char *)umem.reserved,
+ sizeof(umem.reserved)))
+ break;
+
+ ret = vduse_dev_dereg_umem(dev, umem.iova,
+ umem.size);
+ break;
+ }
+ case VDUSE_IOTLB_GET_INFO: {
+ struct vduse_iova_info info;
+ struct vhost_iotlb_map *map;
+ struct vduse_iova_domain *domain = dev->domain;
+
+ ret = -EFAULT;
+ if (copy_from_user(&info, argp, sizeof(info)))
+ break;
+
+ ret = -EINVAL;
+ if (info.start > info.last)
+ break;
+
+ if (!is_mem_zero((const char *)info.reserved,
+ sizeof(info.reserved)))
+ break;
+
+ spin_lock(&domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(domain->iotlb,
+ info.start, info.last);
+ if (map) {
+ info.start = map->start;
+ info.last = map->last;
+ info.capability = 0;
+ if (domain->bounce_map && map->start == 0 &&
+ map->last == domain->bounce_size - 1)
+ info.capability |= VDUSE_IOVA_CAP_UMEM;
+ }
+ spin_unlock(&domain->iotlb_lock);
+ if (!map)
+ break;
+
+ ret = -EFAULT;
+ if (copy_to_user(argp, &info, sizeof(info)))
+ break;
+
+ ret = 0;
+ break;
+ }
default:
ret = -ENOIOCTLCMD;
break;
@@ -1101,6 +1279,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{
struct vduse_dev *dev = file->private_data;
+ vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list);
@@ -1163,6 +1342,7 @@ static struct vduse_dev *vduse_dev_create(void)
return NULL;
mutex_init(&dev->lock);
+ mutex_init(&dev->mem_lock);
spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list);