summaryrefslogtreecommitdiff
path: root/drivers/vdpa/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vdpa/mlx5')
-rw-r--r--drivers/vdpa/mlx5/Makefile2
-rw-r--r--drivers/vdpa/mlx5/core/mlx5_vdpa.h79
-rw-r--r--drivers/vdpa/mlx5/core/mr.c533
-rw-r--r--drivers/vdpa/mlx5/core/resources.c79
-rw-r--r--drivers/vdpa/mlx5/net/debug.c153
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c1734
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.h119
7 files changed, 2148 insertions, 551 deletions
diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
index f717978c83bf..e791394c33e3 100644
--- a/drivers/vdpa/mlx5/Makefile
+++ b/drivers/vdpa/mlx5/Makefile
@@ -1,4 +1,4 @@
subdir-ccflags-y += -I$(srctree)/drivers/vdpa/mlx5/core
obj-$(CONFIG_MLX5_VDPA_NET) += mlx5_vdpa.o
-mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/mlx5_vnet.o core/resources.o core/mr.o
+mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/mlx5_vnet.o core/resources.o core/mr.o net/debug.o
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 058fbe28107e..2cedf7e2dbc4 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -31,11 +31,13 @@ struct mlx5_vdpa_mr {
struct list_head head;
unsigned long num_directs;
unsigned long num_klms;
- bool initialized;
- /* serialize mkey creation and destruction */
- struct mutex mkey_mtx;
+ struct vhost_iotlb *iotlb;
+
bool user_mr;
+
+ refcount_t refcount;
+ struct list_head mr_list;
};
struct mlx5_vdpa_resources {
@@ -73,17 +75,36 @@ struct mlx5_vdpa_wq_ent {
enum {
MLX5_VDPA_DATAVQ_GROUP,
MLX5_VDPA_CVQ_GROUP,
+ MLX5_VDPA_DATAVQ_DESC_GROUP,
MLX5_VDPA_NUMVQ_GROUPS
};
enum {
- MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
+ MLX5_VDPA_NUM_AS = 2
+};
+
+struct mlx5_vdpa_mr_resources {
+ struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
+ unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
+
+ /* Pre-deletion mr list */
+ struct list_head mr_list_head;
+
+ /* Deferred mr list */
+ struct list_head mr_gc_list_head;
+ struct workqueue_struct *wq_gc;
+ struct delayed_work gc_dwork_ent;
+
+ struct mutex lock;
+
+ atomic_t shutdown;
};
struct mlx5_vdpa_dev {
struct vdpa_device vdev;
struct mlx5_core_dev *mdev;
struct mlx5_vdpa_resources res;
+ struct mlx5_vdpa_mr_resources mres;
u64 mlx_features;
u64 actual_features;
@@ -92,15 +113,25 @@ struct mlx5_vdpa_dev {
u16 max_idx;
u32 generation;
- struct mlx5_vdpa_mr mr;
struct mlx5_control_vq cvq;
struct workqueue_struct *wq;
- unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
+ bool suspended;
+
+ struct mlx5_async_ctx async_ctx;
+};
+
+struct mlx5_vdpa_async_cmd {
+ int err;
+ struct mlx5_async_work cb_work;
+ struct completion cmd_done;
+
+ void *in;
+ size_t inlen;
+
+ void *out;
+ size_t outlen;
};
-int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
-int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid);
-int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey);
int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
@@ -115,11 +146,31 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev);
int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
int inlen);
int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
-int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- bool *change_map, unsigned int asid);
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- unsigned int asid);
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
+struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb);
+int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr);
+void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr);
+void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr,
+ unsigned int asid);
+int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb,
+ unsigned int asid);
+int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev);
+int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
+int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_async_cmd *cmds,
+ int num_cmds);
+
+#define mlx5_vdpa_err(__dev, format, ...) \
+ dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__, \
+ current->pid, ##__VA_ARGS__)
+
#define mlx5_vdpa_warn(__dev, format, ...) \
dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 0a1e0b0dc37e..8870a7169267 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -49,17 +49,23 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
}
}
-static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
+struct mlx5_create_mkey_mem {
+ u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
+ u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
+ __be64 mtt[];
+};
+
+struct mlx5_destroy_mkey_mem {
+ u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)];
+ u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)];
+};
+
+static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_direct_mr *mr,
+ struct mlx5_create_mkey_mem *mem)
{
- int inlen;
+ void *in = &mem->in;
void *mkc;
- void *in;
- int err;
-
- inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
@@ -76,18 +82,36 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
get_octo_len(mr->end - mr->start, mr->log_size));
populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
- err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
- kvfree(in);
- if (err) {
- mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
- return err;
- }
- return 0;
+ MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+ MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
+}
+
+static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_direct_mr *mr,
+ struct mlx5_create_mkey_mem *mem)
+{
+ u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
+
+ mr->mr = mlx5_idx_to_mkey(mkey_index);
+}
+
+static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_direct_mr *mr,
+ struct mlx5_destroy_mkey_mem *mem)
+{
+ void *in = &mem->in;
+
+ MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
+ MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr));
}
static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
{
+ if (!mr->mr)
+ return;
+
mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
}
@@ -166,9 +190,12 @@ again:
klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
preve = dmr->end;
} else {
+ u64 bcount = min_t(u64, dmr->start - preve, MAX_KLM_SIZE);
+
klm->key = cpu_to_be32(mvdev->res.null_mkey);
- klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
- preve = dmr->start;
+ klm->bcount = cpu_to_be32(klm_bcount(bcount));
+ preve += bcount;
+
goto again;
}
}
@@ -179,6 +206,123 @@ static int klm_byte_size(int nklms)
return 16 * ALIGN(nklms, 4);
}
+#define MLX5_VDPA_MTT_ALIGN 16
+
+static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
+{
+ struct mlx5_vdpa_async_cmd *cmds;
+ struct mlx5_vdpa_direct_mr *dmr;
+ int err = 0;
+ int i = 0;
+
+ cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
+ if (!cmds)
+ return -ENOMEM;
+
+ list_for_each_entry(dmr, &mr->head, list) {
+ struct mlx5_create_mkey_mem *cmd_mem;
+ int mttlen, mttcount;
+
+ mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN);
+ mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
+ cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
+ if (!cmd_mem) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ cmds[i].out = cmd_mem->out;
+ cmds[i].outlen = sizeof(cmd_mem->out);
+ cmds[i].in = cmd_mem->in;
+ cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
+
+ fill_create_direct_mr(mvdev, dmr, cmd_mem);
+
+ i++;
+ }
+
+ err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
+ if (err) {
+
+ mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
+ goto done;
+ }
+
+ i = 0;
+ list_for_each_entry(dmr, &mr->head, list) {
+ struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
+ struct mlx5_create_mkey_mem *cmd_mem;
+
+ cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
+
+ if (!cmd->err) {
+ create_direct_mr_end(mvdev, dmr, cmd_mem);
+ } else {
+ err = err ? err : cmd->err;
+ mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
+ dmr->start, dmr->end, cmd->err);
+ }
+ }
+
+done:
+ for (i = i-1; i >= 0; i--) {
+ struct mlx5_create_mkey_mem *cmd_mem;
+
+ cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
+ kvfree(cmd_mem);
+ }
+
+ kvfree(cmds);
+ return err;
+}
+
+DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T))
+DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T))
+
+static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
+{
+ struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL;
+ struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL;
+ struct mlx5_vdpa_direct_mr *dmr;
+ int err = 0;
+ int i = 0;
+
+ cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
+ cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL);
+ if (!cmds || !cmd_mem)
+ return -ENOMEM;
+
+ list_for_each_entry(dmr, &mr->head, list) {
+ cmds[i].out = cmd_mem[i].out;
+ cmds[i].outlen = sizeof(cmd_mem[i].out);
+ cmds[i].in = cmd_mem[i].in;
+ cmds[i].inlen = sizeof(cmd_mem[i].in);
+ fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]);
+ i++;
+ }
+
+ err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
+ if (err) {
+
+ mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err);
+ return err;
+ }
+
+ i = 0;
+ list_for_each_entry(dmr, &mr->head, list) {
+ struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
+
+ dmr->mr = 0;
+ if (cmd->err) {
+ err = err ? err : cmd->err;
+ mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n",
+ dmr->start, dmr->end, cmd->err);
+ }
+ }
+
+ return err;
+}
+
static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{
int inlen;
@@ -227,21 +371,19 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
unsigned long lgcd = 0;
int log_entity_size;
unsigned long size;
- u64 start = 0;
int err;
struct page *pg;
unsigned int nsg;
int sglen;
- u64 pa;
+ u64 pa, offset;
u64 paend;
struct scatterlist *sg;
- struct device *dma = mvdev->vdev.dma_dev;
+ struct device *dma = mvdev->vdev.vmap.dma_dev;
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
- map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
+ map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
size = maplen(map, mr);
lgcd = gcd(lgcd, size);
- start += size;
}
log_entity_size = ilog2(lgcd);
@@ -255,8 +397,10 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
sg = mr->sg_head.sgl;
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
- paend = map->addr + maplen(map, mr);
- for (pa = map->addr; pa < paend; pa += sglen) {
+ offset = mr->start > map->start ? mr->start - map->start : 0;
+ pa = map->addr + offset;
+ paend = map->addr + offset + maplen(map, mr);
+ for (; pa < paend; pa += sglen) {
pg = pfn_to_page(__phys_to_pfn(pa));
if (!sg) {
mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
@@ -279,14 +423,8 @@ done:
goto err_map;
}
- err = create_direct_mr(mvdev, mr);
- if (err)
- goto err_direct;
-
return 0;
-err_direct:
- dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
err_map:
sg_free_table(&mr->sg_head);
return err;
@@ -294,17 +432,20 @@ err_map:
static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
{
- struct device *dma = mvdev->vdev.dma_dev;
+ struct device *dma = mvdev->vdev.vmap.dma_dev;
destroy_direct_mr(mvdev, mr);
dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
sg_free_table(&mr->sg_head);
}
-static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
+static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr,
+ u64 start,
+ u64 size,
+ u8 perm,
struct vhost_iotlb *iotlb)
{
- struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n;
LIST_HEAD(tmp);
@@ -354,9 +495,10 @@ err_alloc:
* indirect memory key that provides access to the enitre address space given
* by iotlb.
*/
-static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr,
+ struct vhost_iotlb *iotlb)
{
- struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n;
struct vhost_iotlb_map *map;
@@ -384,7 +526,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb
LOG_MAX_KLM_SIZE);
mr->num_klms += nnuls;
}
- err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
+ err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
if (err)
goto err_chain;
}
@@ -393,7 +535,11 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb
pperm = map->perm;
}
}
- err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
+ err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
+ if (err)
+ goto err_chain;
+
+ err = create_direct_keys(mvdev, mr);
if (err)
goto err_chain;
@@ -450,20 +596,23 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
}
-static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
+static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src)
{
struct vhost_iotlb_map *map;
u64 start = 0, last = ULLONG_MAX;
int err;
+ if (dst == src)
+ return -EINVAL;
+
if (!src) {
- err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
+ err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW);
return err;
}
for (map = vhost_iotlb_itree_first(src, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
- err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
+ err = vhost_iotlb_add_range(dst, map->start, map->last,
map->addr, map->perm);
if (err)
return err;
@@ -471,9 +620,9 @@ static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
return 0;
}
-static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
+static void prune_iotlb(struct vhost_iotlb *iotlb)
{
- vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
+ vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX);
}
static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
@@ -482,6 +631,7 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
struct mlx5_vdpa_direct_mr *n;
destroy_indirect_key(mvdev, mr);
+ destroy_direct_keys(mvdev, mr);
list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
list_del_init(&dmr->list);
unmap_direct_mr(mvdev, dmr);
@@ -489,91 +639,282 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
}
}
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{
- struct mlx5_vdpa_mr *mr = &mvdev->mr;
+ if (WARN_ON(!mr))
+ return;
- mutex_lock(&mr->mkey_mtx);
- if (!mr->initialized)
- goto out;
-
- prune_iotlb(mvdev);
if (mr->user_mr)
destroy_user_mr(mvdev, mr);
else
destroy_dma_mr(mvdev, mr);
- memset(mr, 0, sizeof(*mr));
- mr->initialized = false;
-out:
- mutex_unlock(&mr->mkey_mtx);
+ vhost_iotlb_free(mr->iotlb);
+
+ list_del(&mr->mr_list);
+
+ kfree(mr);
+}
+
+/* There can be multiple .set_map() operations in quick succession.
+ * This large delay is a simple way to prevent the MR cleanup from blocking
+ * .set_map() MR creation in this scenario.
+ */
+#define MLX5_VDPA_MR_GC_TRIGGER_MS 2000
+
+static void mlx5_vdpa_mr_gc_handler(struct work_struct *work)
+{
+ struct mlx5_vdpa_mr_resources *mres;
+ struct mlx5_vdpa_mr *mr, *tmp;
+ struct mlx5_vdpa_dev *mvdev;
+
+ mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work);
+
+ if (atomic_read(&mres->shutdown)) {
+ mutex_lock(&mres->lock);
+ } else if (!mutex_trylock(&mres->lock)) {
+ queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
+ msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
+ return;
+ }
+
+ mvdev = container_of(mres, struct mlx5_vdpa_dev, mres);
+
+ list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) {
+ _mlx5_vdpa_destroy_mr(mvdev, mr);
+ }
+
+ mutex_unlock(&mres->lock);
+}
+
+static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
+{
+ struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
+
+ if (!mr)
+ return;
+
+ if (refcount_dec_and_test(&mr->refcount)) {
+ list_move_tail(&mr->mr_list, &mres->mr_gc_list_head);
+ queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
+ msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
+ }
+}
+
+void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
+{
+ mutex_lock(&mvdev->mres.lock);
+ _mlx5_vdpa_put_mr(mvdev, mr);
+ mutex_unlock(&mvdev->mres.lock);
+}
+
+static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
+{
+ if (!mr)
+ return;
+
+ refcount_inc(&mr->refcount);
+}
+
+void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
+{
+ mutex_lock(&mvdev->mres.lock);
+ _mlx5_vdpa_get_mr(mvdev, mr);
+ mutex_unlock(&mvdev->mres.lock);
+}
+
+void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *new_mr,
+ unsigned int asid)
+{
+ struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid];
+
+ mutex_lock(&mvdev->mres.lock);
+
+ _mlx5_vdpa_put_mr(mvdev, old_mr);
+ mvdev->mres.mr[asid] = new_mr;
+
+ mutex_unlock(&mvdev->mres.lock);
+}
+
+static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_vdpa_mr *mr;
+
+ mutex_lock(&mvdev->mres.lock);
+
+ list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) {
+
+ mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
+ "mr: %p, mkey: 0x%x, refcount: %u\n",
+ mr, mr->mkey, refcount_read(&mr->refcount));
+ }
+
+ mutex_unlock(&mvdev->mres.lock);
+
+}
+
+void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev)
+{
+ if (!mvdev->res.valid)
+ return;
+
+ for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
+ mlx5_vdpa_update_mr(mvdev, NULL, i);
+
+ prune_iotlb(mvdev->cvq.iotlb);
+
+ mlx5_vdpa_show_mr_leaks(mvdev);
}
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
- struct vhost_iotlb *iotlb, unsigned int asid)
+ struct mlx5_vdpa_mr *mr,
+ struct vhost_iotlb *iotlb)
{
- struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err;
- if (mr->initialized)
- return 0;
+ if (iotlb)
+ err = create_user_mr(mvdev, mr, iotlb);
+ else
+ err = create_dma_mr(mvdev, mr);
- if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
- if (iotlb)
- err = create_user_mr(mvdev, iotlb);
- else
- err = create_dma_mr(mvdev, mr);
+ if (err)
+ return err;
- if (err)
- return err;
+ mr->iotlb = vhost_iotlb_alloc(0, 0);
+ if (!mr->iotlb) {
+ err = -ENOMEM;
+ goto err_mr;
}
- if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
- err = dup_iotlb(mvdev, iotlb);
- if (err)
- goto out_err;
- }
+ err = dup_iotlb(mr->iotlb, iotlb);
+ if (err)
+ goto err_iotlb;
+
+ list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head);
- mr->initialized = true;
return 0;
-out_err:
- if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
- if (iotlb)
- destroy_user_mr(mvdev, mr);
- else
- destroy_dma_mr(mvdev, mr);
- }
+err_iotlb:
+ vhost_iotlb_free(mr->iotlb);
+
+err_mr:
+ if (iotlb)
+ destroy_user_mr(mvdev, mr);
+ else
+ destroy_dma_mr(mvdev, mr);
return err;
}
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- unsigned int asid)
+struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb)
{
+ struct mlx5_vdpa_mr *mr;
int err;
- mutex_lock(&mvdev->mr.mkey_mtx);
- err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
- mutex_unlock(&mvdev->mr.mkey_mtx);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&mvdev->mres.lock);
+ err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb);
+ mutex_unlock(&mvdev->mres.lock);
+
+ if (err)
+ goto out_err;
+
+ refcount_set(&mr->refcount, 1);
+
+ return mr;
+
+out_err:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb,
+ unsigned int asid)
+{
+ int err;
+
+ if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+ return 0;
+
+ spin_lock(&mvdev->cvq.iommu_lock);
+
+ prune_iotlb(mvdev->cvq.iotlb);
+ err = dup_iotlb(mvdev->cvq.iotlb, iotlb);
+
+ spin_unlock(&mvdev->cvq.iommu_lock);
+
return err;
}
-int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- bool *change_map, unsigned int asid)
+int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev)
{
- struct mlx5_vdpa_mr *mr = &mvdev->mr;
- int err = 0;
+ struct mlx5_vdpa_mr *mr;
+
+ mr = mlx5_vdpa_create_mr(mvdev, NULL);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mlx5_vdpa_update_mr(mvdev, mr, 0);
- *change_map = false;
- mutex_lock(&mr->mkey_mtx);
- if (mr->initialized) {
- mlx5_vdpa_info(mvdev, "memory map update\n");
- *change_map = true;
+ return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0);
+}
+
+int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+ if (asid >= MLX5_VDPA_NUM_AS)
+ return -EINVAL;
+
+ mlx5_vdpa_update_mr(mvdev, NULL, asid);
+
+ if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
+ if (mlx5_vdpa_create_dma_mr(mvdev))
+ mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
+ } else {
+ mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid);
}
- if (!*change_map)
- err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
- mutex_unlock(&mr->mkey_mtx);
- return err;
+ return 0;
+}
+
+int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
+
+ mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc");
+ if (!mres->wq_gc)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler);
+
+ mutex_init(&mres->lock);
+
+ INIT_LIST_HEAD(&mres->mr_list_head);
+ INIT_LIST_HEAD(&mres->mr_gc_list_head);
+
+ return 0;
+}
+
+void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
+
+ if (!mres->wq_gc)
+ return;
+
+ atomic_set(&mres->shutdown, 1);
+
+ flush_delayed_work(&mres->gc_dwork_ent);
+ destroy_workqueue(mres->wq_gc);
+ mres->wq_gc = NULL;
+ mutex_destroy(&mres->lock);
}
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 9800f9bec225..aeae31d0cefa 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -213,7 +213,7 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
return err;
mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
- *mkey |= mlx5_idx_to_mkey(mkey_index);
+ *mkey = mlx5_idx_to_mkey(mkey_index);
return 0;
}
@@ -233,6 +233,7 @@ static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
if (!mvdev->cvq.iotlb)
return -ENOMEM;
+ spin_lock_init(&mvdev->cvq.iommu_lock);
vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
return 0;
@@ -255,7 +256,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
mlx5_vdpa_warn(mvdev, "resources already allocated\n");
return -EINVAL;
}
- mutex_init(&mvdev->mr.mkey_mtx);
res->uar = mlx5_get_uars_page(mdev);
if (IS_ERR(res->uar)) {
err = PTR_ERR(res->uar);
@@ -300,7 +300,6 @@ err_pd:
err_uctx:
mlx5_put_uars_page(mdev, res->uar);
err_uars:
- mutex_destroy(&mvdev->mr.mkey_mtx);
return err;
}
@@ -317,6 +316,78 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
dealloc_pd(mvdev, res->pdn, res->uid);
destroy_uctx(mvdev, res->uid);
mlx5_put_uars_page(mvdev->mdev, res->uar);
- mutex_destroy(&mvdev->mr.mkey_mtx);
res->valid = false;
}
+
+static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5_vdpa_async_cmd *cmd =
+ container_of(context, struct mlx5_vdpa_async_cmd, cb_work);
+
+ cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out);
+ complete(&cmd->cmd_done);
+}
+
+static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_async_cmd *cmds,
+ int issued,
+ int *completed)
+
+{
+ struct mlx5_vdpa_async_cmd *cmd = &cmds[issued];
+ int err;
+
+retry:
+ err = mlx5_cmd_exec_cb(&mvdev->async_ctx,
+ cmd->in, cmd->inlen,
+ cmd->out, cmd->outlen,
+ virtqueue_cmd_callback,
+ &cmd->cb_work);
+ if (err == -EBUSY) {
+ if (*completed < issued) {
+ /* Throttled by own commands: wait for oldest completion. */
+ wait_for_completion(&cmds[*completed].cmd_done);
+ (*completed)++;
+
+ goto retry;
+ } else {
+ /* Throttled by external commands: switch to sync api. */
+ err = mlx5_cmd_exec(mvdev->mdev,
+ cmd->in, cmd->inlen,
+ cmd->out, cmd->outlen);
+ if (!err)
+ (*completed)++;
+ }
+ }
+
+ return err;
+}
+
+int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_async_cmd *cmds,
+ int num_cmds)
+{
+ int completed = 0;
+ int issued = 0;
+ int err = 0;
+
+ for (int i = 0; i < num_cmds; i++)
+ init_completion(&cmds[i].cmd_done);
+
+ while (issued < num_cmds) {
+
+ err = issue_async_cmd(mvdev, cmds, issued, &completed);
+ if (err) {
+ mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n",
+ issued, num_cmds, err);
+ break;
+ }
+
+ issued++;
+ }
+
+ while (completed < issued)
+ wait_for_completion(&cmds[completed++].cmd_done);
+
+ return err;
+}
diff --git a/drivers/vdpa/mlx5/net/debug.c b/drivers/vdpa/mlx5/net/debug.c
new file mode 100644
index 000000000000..9c85162c19fc
--- /dev/null
+++ b/drivers/vdpa/mlx5/net/debug.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/debugfs.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_vnet.h"
+
+static int tirn_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_net *ndev = file->private;
+
+ seq_printf(file, "0x%x\n", ndev->res.tirn);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(tirn);
+
+void mlx5_vdpa_remove_tirn(struct mlx5_vdpa_net *ndev)
+{
+ if (ndev->debugfs)
+ debugfs_remove(ndev->res.tirn_dent);
+}
+
+void mlx5_vdpa_add_tirn(struct mlx5_vdpa_net *ndev)
+{
+ ndev->res.tirn_dent = debugfs_create_file("tirn", 0444, ndev->rx_dent,
+ ndev, &tirn_fops);
+}
+
+static int rx_flow_table_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_net *ndev = file->private;
+
+ seq_printf(file, "0x%x\n", mlx5_flow_table_id(ndev->rxft));
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(rx_flow_table);
+
+void mlx5_vdpa_remove_rx_flow_table(struct mlx5_vdpa_net *ndev)
+{
+ if (ndev->debugfs)
+ debugfs_remove(ndev->rx_table_dent);
+}
+
+void mlx5_vdpa_add_rx_flow_table(struct mlx5_vdpa_net *ndev)
+{
+ ndev->rx_table_dent = debugfs_create_file("table_id", 0444, ndev->rx_dent,
+ ndev, &rx_flow_table_fops);
+}
+
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+static int packets_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_counter *counter = file->private;
+ u64 packets;
+ u64 bytes;
+ int err;
+
+ err = mlx5_fc_query(counter->mdev, counter->counter, &packets, &bytes);
+ if (err)
+ return err;
+
+ seq_printf(file, "0x%llx\n", packets);
+ return 0;
+}
+
+static int bytes_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_counter *counter = file->private;
+ u64 packets;
+ u64 bytes;
+ int err;
+
+ err = mlx5_fc_query(counter->mdev, counter->counter, &packets, &bytes);
+ if (err)
+ return err;
+
+ seq_printf(file, "0x%llx\n", bytes);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(packets);
+DEFINE_SHOW_ATTRIBUTE(bytes);
+
+static void add_counter_node(struct mlx5_vdpa_counter *counter,
+ struct dentry *parent)
+{
+ debugfs_create_file("packets", 0444, parent, counter,
+ &packets_fops);
+ debugfs_create_file("bytes", 0444, parent, counter,
+ &bytes_fops);
+}
+
+void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node)
+{
+ static const char *ut = "untagged";
+ char vidstr[9];
+ u16 vid;
+
+ node->ucast_counter.mdev = ndev->mvdev.mdev;
+ node->mcast_counter.mdev = ndev->mvdev.mdev;
+ if (node->tagged) {
+ vid = key2vid(node->macvlan);
+ snprintf(vidstr, sizeof(vidstr), "0x%x", vid);
+ } else {
+ strcpy(vidstr, ut);
+ }
+
+ node->dent = debugfs_create_dir(vidstr, ndev->rx_dent);
+ if (IS_ERR(node->dent)) {
+ node->dent = NULL;
+ return;
+ }
+
+ node->ucast_counter.dent = debugfs_create_dir("ucast", node->dent);
+ if (IS_ERR(node->ucast_counter.dent))
+ return;
+
+ add_counter_node(&node->ucast_counter, node->ucast_counter.dent);
+
+ node->mcast_counter.dent = debugfs_create_dir("mcast", node->dent);
+ if (IS_ERR(node->mcast_counter.dent))
+ return;
+
+ add_counter_node(&node->mcast_counter, node->mcast_counter.dent);
+}
+
+void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node)
+{
+ if (node->dent && ndev->debugfs)
+ debugfs_remove_recursive(node->dent);
+}
+#endif
+
+void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = ndev->mvdev.mdev;
+ ndev->debugfs = debugfs_create_dir(dev_name(&ndev->mvdev.vdev.dev),
+ mlx5_debugfs_get_dev_root(mdev));
+ if (!IS_ERR(ndev->debugfs))
+ ndev->rx_dent = debugfs_create_dir("rx", ndev->debugfs);
+}
+
+void mlx5_vdpa_remove_debugfs(struct mlx5_vdpa_net *ndev)
+{
+ debugfs_remove_recursive(ndev->debugfs);
+ ndev->debugfs = NULL;
+}
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 3a6dbbc6440d..ddaa1366704b 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -7,6 +7,7 @@
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/virtio_ids.h>
#include <uapi/linux/vdpa.h>
+#include <uapi/linux/vhost_types.h>
#include <linux/virtio_config.h>
#include <linux/auxiliary_bus.h>
#include <linux/mlx5/cq.h>
@@ -18,15 +19,12 @@
#include <linux/mlx5/mlx5_ifc_vdpa.h>
#include <linux/mlx5/mpfs.h>
#include "mlx5_vdpa.h"
+#include "mlx5_vnet.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox VDPA driver");
MODULE_LICENSE("Dual BSD/GPL");
-#define to_mlx5_vdpa_ndev(__mvdev) \
- container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
-#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
-
#define VALID_FEATURES_MASK \
(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
@@ -50,13 +48,17 @@ MODULE_LICENSE("Dual BSD/GPL");
#define MLX5V_UNTAGGED 0x1000
-struct mlx5_vdpa_net_resources {
- u32 tisn;
- u32 tdn;
- u32 tirn;
- u32 rqtn;
- bool valid;
-};
+/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
+ * 5.1.6.5.5 "Device operation in multiqueue mode":
+ *
+ * Multiqueue is disabled by default.
+ * The driver enables multiqueue by sending a command using class
+ * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
+ * operation, as follows: ...
+ */
+#define MLX5V_DEFAULT_VQ_COUNT 2
+
+#define MLX5V_DEFAULT_VQ_SIZE 256
struct mlx5_vdpa_cq_buf {
struct mlx5_frag_buf_ctrl fbc;
@@ -94,6 +96,7 @@ struct mlx5_vq_restore_info {
u64 driver_addr;
u16 avail_index;
u16 used_index;
+ struct msi_map map;
bool ready;
bool restore;
};
@@ -130,6 +133,13 @@ struct mlx5_vdpa_virtqueue {
u16 used_idx;
int fw_state;
+ u64 modified_fields;
+
+ struct mlx5_vdpa_mr *vq_mr;
+ struct mlx5_vdpa_mr *desc_mr;
+
+ struct msi_map map;
+
/* keep last in the struct */
struct mlx5_vq_restore_info ri;
};
@@ -146,47 +156,14 @@ static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
return idx <= mvdev->max_idx;
}
-#define MLX5V_MACVLAN_SIZE 256
-
-struct mlx5_vdpa_net {
- struct mlx5_vdpa_dev mvdev;
- struct mlx5_vdpa_net_resources res;
- struct virtio_net_config config;
- struct mlx5_vdpa_virtqueue *vqs;
- struct vdpa_callback *event_cbs;
-
- /* Serialize vq resources creation and destruction. This is required
- * since memory map might change and we need to destroy and create
- * resources while driver in operational.
- */
- struct rw_semaphore reslock;
- struct mlx5_flow_table *rxft;
- bool setup;
- u32 cur_num_vqs;
- u32 rqt_size;
- bool nb_registered;
- struct notifier_block nb;
- struct vdpa_callback config_cb;
- struct mlx5_vdpa_wq_ent cvq_ent;
- struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
-};
-
-struct macvlan_node {
- struct hlist_node hlist;
- struct mlx5_flow_handle *ucast_rule;
- struct mlx5_flow_handle *mcast_rule;
- u64 macvlan;
-};
-
-static void free_resources(struct mlx5_vdpa_net *ndev);
-static void init_mvqs(struct mlx5_vdpa_net *ndev);
-static int setup_driver(struct mlx5_vdpa_dev *mvdev);
-static void teardown_driver(struct mlx5_vdpa_net *ndev);
+static void free_fixed_resources(struct mlx5_vdpa_net *ndev);
+static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev);
+static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled);
+static void teardown_vq_resources(struct mlx5_vdpa_net *ndev);
+static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq);
static bool mlx5_vdpa_debug;
-#define MLX5_CVQ_MAX_ENT 16
-
#define MLX5_LOG_VIO_FLAG(_feature) \
do { \
if (features & BIT_ULL(_feature)) \
@@ -596,6 +573,8 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
vcq->mcq.set_ci_db = vcq->db.db;
vcq->mcq.arm_db = vcq->db.db + 1;
vcq->mcq.cqe_sz = 64;
+ vcq->mcq.comp = mlx5_vdpa_cq_comp;
+ vcq->cqe = num_ent;
err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
if (err)
@@ -621,7 +600,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
/* Use vector 0 by default. Consider adding code to choose least used
* vector.
*/
- err = mlx5_vector2eqn(mdev, 0, &eqn);
+ err = mlx5_comp_eqn_get(mdev, 0, &eqn);
if (err)
goto err_vec;
@@ -635,10 +614,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
if (err)
goto err_vec;
- vcq->mcq.comp = mlx5_vdpa_cq_comp;
- vcq->cqe = num_ent;
- vcq->mcq.set_ci_db = vcq->db.db;
- vcq->mcq.arm_db = vcq->db.db + 1;
mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
kfree(in);
return 0;
@@ -666,30 +641,70 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
}
+static int read_umem_params(struct mlx5_vdpa_net *ndev)
+{
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
+ struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
+ int out_size;
+ void *caps;
+ void *out;
+ int err;
+
+ out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ out = kzalloc(out_size, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ if (err) {
+ mlx5_vdpa_warn(&ndev->mvdev,
+ "Failed reading vdpa umem capabilities with err %d\n", err);
+ goto out;
+ }
+
+ caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+
+ ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
+ ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
+
+ ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
+ ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
+
+ ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
+ ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
+
+out:
+ kfree(out);
+ return 0;
+}
+
static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
struct mlx5_vdpa_umem **umemp)
{
- struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
- int p_a;
- int p_b;
+ u32 p_a;
+ u32 p_b;
switch (num) {
case 1:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
+ p_a = ndev->umem_1_buffer_param_a;
+ p_b = ndev->umem_1_buffer_param_b;
*umemp = &mvq->umem1;
break;
case 2:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
+ p_a = ndev->umem_2_buffer_param_a;
+ p_b = ndev->umem_2_buffer_param_b;
*umemp = &mvq->umem2;
break;
case 3:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
+ p_a = ndev->umem_3_buffer_param_a;
+ p_b = ndev->umem_3_buffer_param_b;
*umemp = &mvq->umem3;
break;
}
+
(*umemp)->size = p_a * mvq->num_ent + p_b;
}
@@ -821,12 +836,28 @@ static bool vq_is_tx(u16 idx)
return idx % 2;
}
-static u16 get_features_12_3(u64 features)
+enum {
+ MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
+ MLX5_VIRTIO_NET_F_HOST_ECN = 4,
+ MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
+ MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
+ MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
+ MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
+ MLX5_VIRTIO_NET_F_CSUM = 10,
+ MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
+ MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
+};
+
+static u16 get_features(u64 features)
{
- return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
+ return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
}
static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
@@ -835,11 +866,25 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
}
-static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
+static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
+{
+ return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
+ (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
+ pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
+}
+
+static int create_virtqueue(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq,
+ bool filled)
{
int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
+ struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
+ struct mlx5_vdpa_mr *vq_mr;
+ struct mlx5_vdpa_mr *vq_desc_mr;
+ u64 features = filled ? mvdev->actual_features : mvdev->mlx_features;
void *obj_context;
+ u16 mlx_features;
void *cmd_hdr;
void *vq_ctx;
void *in;
@@ -855,6 +900,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
goto err_alloc;
}
+ mlx_features = get_features(features);
cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
@@ -862,26 +908,58 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
- MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
- MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
- get_features_12_3(ndev->mvdev.actual_features));
+ mlx_features >> 3);
+ MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
+ mlx_features & 7);
vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
if (vq_is_tx(mvq->index))
MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
- MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
+ if (mvq->map.virq) {
+ MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
+ MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
+ } else {
+ MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
+ MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
+ }
+
MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
- MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
- !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
- MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
- MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
- MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
- MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
+ !!(features & BIT_ULL(VIRTIO_F_VERSION_1)));
+
+ if (filled) {
+ MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
+ MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
+
+ MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
+ MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
+ MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
+
+ vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
+ if (vq_mr)
+ MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
+
+ vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
+ if (vq_desc_mr &&
+ MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
+ MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
+ } else {
+ /* If there is no mr update, make sure that the existing ones are set
+ * modify to ready.
+ */
+ vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
+ if (vq_mr)
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
+
+ vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
+ if (vq_desc_mr)
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
+ }
+
MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
@@ -900,6 +978,17 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
kfree(in);
mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ if (filled) {
+ mlx5_vdpa_get_mr(mvdev, vq_mr);
+ mvq->vq_mr = vq_mr;
+
+ if (vq_desc_mr &&
+ MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
+ mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
+ mvq->desc_mr = vq_desc_mr;
+ }
+ }
+
return 0;
err_cmd:
@@ -926,6 +1015,12 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
}
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
umems_destroy(ndev, mvq);
+
+ mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
+ mvq->vq_mr = NULL;
+
+ mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
+ mvq->desc_mr = NULL;
}
static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
@@ -1087,44 +1182,101 @@ struct mlx5_virtq_attr {
u16 used_index;
};
-static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
- struct mlx5_virtq_attr *attr)
-{
- int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
- u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
- void *out;
- void *obj_context;
- void *cmd_hdr;
- int err;
+struct mlx5_virtqueue_query_mem {
+ u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)];
+ u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)];
+};
- out = kzalloc(outlen, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
+struct mlx5_virtqueue_modify_mem {
+ u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)];
+ u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)];
+};
- cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
+static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq,
+ struct mlx5_virtqueue_query_mem *cmd)
+{
+ void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
- err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
- if (err)
- goto err_cmd;
+}
+
+static void query_virtqueue_end(struct mlx5_vdpa_net *ndev,
+ struct mlx5_virtqueue_query_mem *cmd,
+ struct mlx5_virtq_attr *attr)
+{
+ void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context);
- obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
memset(attr, 0, sizeof(*attr));
attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
- kfree(out);
- return 0;
+}
-err_cmd:
- kfree(out);
+static int query_virtqueues(struct mlx5_vdpa_net *ndev,
+ int start_vq,
+ int num_vqs,
+ struct mlx5_virtq_attr *attrs)
+{
+ struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
+ struct mlx5_virtqueue_query_mem *cmd_mem;
+ struct mlx5_vdpa_async_cmd *cmds;
+ int err = 0;
+
+ WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n",
+ start_vq, start_vq + num_vqs, mvdev->max_vqs);
+
+ cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
+ cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
+ if (!cmds || !cmd_mem) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (int i = 0; i < num_vqs; i++) {
+ cmds[i].in = &cmd_mem[i].in;
+ cmds[i].inlen = sizeof(cmd_mem[i].in);
+ cmds[i].out = &cmd_mem[i].out;
+ cmds[i].outlen = sizeof(cmd_mem[i].out);
+ fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]);
+ }
+
+ err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
+ if (err) {
+ mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n",
+ start_vq, start_vq + num_vqs, err);
+ goto done;
+ }
+
+ for (int i = 0; i < num_vqs; i++) {
+ struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
+ int vq_idx = start_vq + i;
+
+ if (cmd->err) {
+ mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, cmd->err);
+ if (!err)
+ err = cmd->err;
+ continue;
+ }
+
+ query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]);
+ }
+
+done:
+ kvfree(cmd_mem);
+ kvfree(cmds);
return err;
}
-static bool is_valid_state_change(int oldstate, int newstate)
+static bool is_resumable(struct mlx5_vdpa_net *ndev)
+{
+ return ndev->mvdev.vdev.config->resume;
+}
+
+static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
{
switch (oldstate) {
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
@@ -1132,48 +1284,122 @@ static bool is_valid_state_change(int oldstate, int newstate)
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
+ return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
default:
return false;
}
}
-static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
+static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
{
- int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
- u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
- void *obj_context;
- void *cmd_hdr;
- void *in;
- int err;
+ /* Only state is always modifiable */
+ if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
+ return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
+ mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
- if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
- return 0;
-
- if (!is_valid_state_change(mvq->fw_state, state))
- return -EINVAL;
+ return true;
+}
- in = kzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq,
+ int state,
+ struct mlx5_virtqueue_modify_mem *cmd)
+{
+ struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
+ struct mlx5_vdpa_mr *desc_mr = NULL;
+ struct mlx5_vdpa_mr *vq_mr = NULL;
+ void *obj_context;
+ void *cmd_hdr;
+ void *vq_ctx;
- cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
+ cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
- obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
- MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
- MLX5_VIRTQ_MODIFY_MASK_STATE);
- MLX5_SET(virtio_net_q_object, obj_context, state, state);
- err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
- kfree(in);
- if (!err)
+ obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context);
+ vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
+ MLX5_SET(virtio_net_q_object, obj_context, state, state);
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
+ MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
+ MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
+ MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
+ MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
+ MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION)
+ MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
+ !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) {
+ u16 mlx_features = get_features(ndev->mvdev.actual_features);
+
+ MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
+ mlx_features >> 3);
+ MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
+ mlx_features & 7);
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
+ vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
+
+ if (vq_mr)
+ MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
+ else
+ mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
+ desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
+
+ if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
+ MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
+ else
+ mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
+ }
+
+ MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
+}
+
+static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq,
+ int state)
+{
+ struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
+ unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP];
+ struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid];
+
+ mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
+ mlx5_vdpa_get_mr(mvdev, vq_mr);
+ mvq->vq_mr = vq_mr;
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
+ unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP];
+ struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid];
+
+ mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
+ mlx5_vdpa_get_mr(mvdev, desc_mr);
+ mvq->desc_mr = desc_mr;
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
mvq->fw_state = state;
- return err;
+ mvq->modified_fields = 0;
}
static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
@@ -1217,13 +1443,62 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
}
-static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
+static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
{
- u16 idx = mvq->index;
+ struct vdpa_callback *cb = priv;
+
+ if (cb->callback)
+ return cb->callback(cb->private);
+
+ return IRQ_HANDLED;
+}
+
+static void alloc_vector(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq)
+{
+ struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
+ struct mlx5_vdpa_irq_pool_entry *ent;
int err;
+ int i;
- if (!mvq->num_ent)
- return 0;
+ for (i = 0; i < irqp->num_ent; i++) {
+ ent = &irqp->entries[i];
+ if (!ent->used) {
+ snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
+ dev_name(&ndev->mvdev.vdev.dev), mvq->index);
+ ent->dev_id = &ndev->event_cbs[mvq->index];
+ err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
+ ent->name, ent->dev_id);
+ if (err)
+ return;
+
+ ent->used = true;
+ mvq->map = ent->map;
+ return;
+ }
+ }
+}
+
+static void dealloc_vector(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq)
+{
+ struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
+ int i;
+
+ for (i = 0; i < irqp->num_ent; i++)
+ if (mvq->map.virq == irqp->entries[i].map.virq) {
+ free_irq(mvq->map.virq, irqp->entries[i].dev_id);
+ irqp->entries[i].used = false;
+ return;
+ }
+}
+
+static int setup_vq(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq,
+ bool filled)
+{
+ u16 idx = mvq->index;
+ int err;
if (mvq->initialized)
return 0;
@@ -1246,27 +1521,29 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
err = counter_set_alloc(ndev, mvq);
if (err)
- goto err_counter;
+ goto err_connect;
- err = create_virtqueue(ndev, mvq);
+ alloc_vector(ndev, mvq);
+ err = create_virtqueue(ndev, mvq, filled);
if (err)
- goto err_connect;
+ goto err_vq;
+
+ mvq->initialized = true;
if (mvq->ready) {
- err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
- if (err) {
- mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
- idx, err);
- goto err_connect;
- }
+ err = resume_vq(ndev, mvq);
+ if (err)
+ goto err_modify;
}
- mvq->initialized = true;
return 0;
-err_connect:
+err_modify:
+ destroy_virtqueue(ndev, mvq);
+err_vq:
+ dealloc_vector(ndev, mvq);
counter_set_dealloc(ndev, mvq);
-err_counter:
+err_connect:
qp_destroy(ndev, &mvq->vqqp);
err_vqqp:
qp_destroy(ndev, &mvq->fwqp);
@@ -1275,33 +1552,171 @@ err_fwqp:
return err;
}
-static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
+static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state)
{
- struct mlx5_virtq_attr attr;
+ struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
+ struct mlx5_virtqueue_modify_mem *cmd_mem;
+ struct mlx5_vdpa_async_cmd *cmds;
+ int err = 0;
+
+ WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n",
+ start_vq, start_vq + num_vqs, mvdev->max_vqs);
+
+ cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
+ cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
+ if (!cmds || !cmd_mem) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (int i = 0; i < num_vqs; i++) {
+ struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
+ struct mlx5_vdpa_virtqueue *mvq;
+ int vq_idx = start_vq + i;
+ mvq = &ndev->vqs[vq_idx];
+
+ if (!modifiable_virtqueue_fields(mvq)) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (mvq->fw_state != state) {
+ if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
+ }
+
+ cmd->in = &cmd_mem[i].in;
+ cmd->inlen = sizeof(cmd_mem[i].in);
+ cmd->out = &cmd_mem[i].out;
+ cmd->outlen = sizeof(cmd_mem[i].out);
+ fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]);
+ }
+
+ err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
+ if (err) {
+ mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n",
+ start_vq, start_vq + num_vqs);
+ goto done;
+ }
+
+ for (int i = 0; i < num_vqs; i++) {
+ struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
+ struct mlx5_vdpa_virtqueue *mvq;
+ int vq_idx = start_vq + i;
+
+ mvq = &ndev->vqs[vq_idx];
+
+ if (cmd->err) {
+ mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n",
+ vq_idx, mvq->fw_state, state, err);
+ if (!err)
+ err = cmd->err;
+ continue;
+ }
+
+ modify_virtqueue_end(ndev, mvq, state);
+ }
+
+done:
+ kvfree(cmd_mem);
+ kvfree(cmds);
+ return err;
+}
+
+static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
+{
+ struct mlx5_vdpa_virtqueue *mvq;
+ struct mlx5_virtq_attr *attrs;
+ int vq_idx, i;
+ int err;
+
+ if (start_vq >= ndev->cur_num_vqs)
+ return -EINVAL;
+
+ mvq = &ndev->vqs[start_vq];
if (!mvq->initialized)
- return;
+ return 0;
if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
- return;
+ return 0;
- if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
- mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
+ err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND);
+ if (err)
+ return err;
- if (query_virtqueue(ndev, mvq, &attr)) {
- mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
- return;
+ attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+
+ err = query_virtqueues(ndev, start_vq, num_vqs, attrs);
+ if (err)
+ goto done;
+
+ for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) {
+ mvq = &ndev->vqs[vq_idx];
+ mvq->avail_idx = attrs[i].available_index;
+ mvq->used_idx = attrs[i].used_index;
}
- mvq->avail_idx = attr.available_index;
- mvq->used_idx = attr.used_index;
+
+done:
+ kfree(attrs);
+ return err;
}
-static void suspend_vqs(struct mlx5_vdpa_net *ndev)
+static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
- int i;
+ return suspend_vqs(ndev, mvq->index, 1);
+}
- for (i = 0; i < ndev->mvdev.max_vqs; i++)
- suspend_vq(ndev, &ndev->vqs[i]);
+static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
+{
+ struct mlx5_vdpa_virtqueue *mvq;
+ int err;
+
+ if (start_vq >= ndev->mvdev.max_vqs)
+ return -EINVAL;
+
+ mvq = &ndev->vqs[start_vq];
+ if (!mvq->initialized)
+ return 0;
+
+ if (mvq->index >= ndev->cur_num_vqs)
+ return 0;
+
+ switch (mvq->fw_state) {
+ case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
+ /* Due to a FW quirk we need to modify the VQ fields first then change state.
+ * This should be fixed soon. After that, a single command can be used.
+ */
+ err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state);
+ if (err)
+ return err;
+ break;
+ case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
+ if (!is_resumable(ndev)) {
+ mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index);
+ return -EINVAL;
+ }
+ break;
+ case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
+ return 0;
+ default:
+ mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n",
+ mvq->index, mvq->fw_state);
+ return -EINVAL;
+ }
+
+ return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
+}
+
+static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
+{
+ return resume_vqs(ndev, mvq->index, 1);
}
static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
@@ -1310,7 +1725,9 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
return;
suspend_vq(ndev, mvq);
+ mvq->modified_fields = 0;
destroy_virtqueue(ndev, mvq);
+ dealloc_vector(ndev, mvq);
counter_set_dealloc(ndev, mvq);
qp_destroy(ndev, &mvq->vqqp);
qp_destroy(ndev, &mvq->fwqp);
@@ -1431,36 +1848,85 @@ static int create_tir(struct mlx5_vdpa_net *ndev)
err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
kfree(in);
+ if (err)
+ return err;
+
+ mlx5_vdpa_add_tirn(ndev);
return err;
}
static void destroy_tir(struct mlx5_vdpa_net *ndev)
{
+ mlx5_vdpa_remove_tirn(ndev);
mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
}
#define MAX_STEERING_ENT 0x8000
#define MAX_STEERING_GROUPS 2
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ #define NUM_DESTS 2
+#else
+ #define NUM_DESTS 1
+#endif
+
+static int add_steering_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dests)
+{
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ int err;
+
+ node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
+ if (IS_ERR(node->ucast_counter.counter))
+ return PTR_ERR(node->ucast_counter.counter);
+
+ node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
+ if (IS_ERR(node->mcast_counter.counter)) {
+ err = PTR_ERR(node->mcast_counter.counter);
+ goto err_mcast_counter;
+ }
+
+ dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ return 0;
+
+err_mcast_counter:
+ mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
+ return err;
+#else
+ return 0;
+#endif
+}
+
+static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node)
+{
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
+ mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
+#endif
+}
+
static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
- u16 vid, bool tagged,
- struct mlx5_flow_handle **ucast,
- struct mlx5_flow_handle **mcast)
+ struct macvlan_node *node)
{
- struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_destination dests[NUM_DESTS] = {};
struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
void *headers_c;
void *headers_v;
u8 *dmac_c;
u8 *dmac_v;
int err;
+ u16 vid;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
+ vid = key2vid(node->macvlan);
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
@@ -1472,44 +1938,58 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
}
- if (tagged) {
+ if (node->tagged) {
MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest.tir_num = ndev->res.tirn;
- rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
- if (IS_ERR(rule))
- return PTR_ERR(rule);
+ dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dests[0].tir_num = ndev->res.tirn;
+ err = add_steering_counters(ndev, node, &flow_act, dests);
+ if (err)
+ goto out_free;
+
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ dests[1].counter = node->ucast_counter.counter;
+#endif
+ node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
+ if (IS_ERR(node->ucast_rule)) {
+ err = PTR_ERR(node->ucast_rule);
+ goto err_ucast;
+ }
- *ucast = rule;
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ dests[1].counter = node->mcast_counter.counter;
+#endif
memset(dmac_c, 0, ETH_ALEN);
memset(dmac_v, 0, ETH_ALEN);
dmac_c[0] = 1;
dmac_v[0] = 1;
- rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
- kvfree(spec);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
+ node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
+ if (IS_ERR(node->mcast_rule)) {
+ err = PTR_ERR(node->mcast_rule);
goto err_mcast;
}
-
- *mcast = rule;
+ kvfree(spec);
+ mlx5_vdpa_add_rx_counters(ndev, node);
return 0;
err_mcast:
- mlx5_del_flow_rules(*ucast);
+ mlx5_del_flow_rules(node->ucast_rule);
+err_ucast:
+ remove_steering_counters(ndev, node);
+out_free:
+ kvfree(spec);
return err;
}
static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
- struct mlx5_flow_handle *ucast,
- struct mlx5_flow_handle *mcast)
+ struct macvlan_node *node)
{
- mlx5_del_flow_rules(ucast);
- mlx5_del_flow_rules(mcast);
+ mlx5_vdpa_remove_rx_counters(ndev, node);
+ mlx5_del_flow_rules(node->ucast_rule);
+ mlx5_del_flow_rules(node->mcast_rule);
}
static u64 search_val(u8 *mac, u16 vlan, bool tagged)
@@ -1543,14 +2023,14 @@ static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 valu
return NULL;
}
-static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid
+static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
{
struct macvlan_node *ptr;
u64 val;
u32 idx;
int err;
- val = search_val(mac, vlan, tagged);
+ val = search_val(mac, vid, tagged);
if (mac_vlan_lookup(ndev, val))
return -EEXIST;
@@ -1558,12 +2038,13 @@ static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagg
if (!ptr)
return -ENOMEM;
- err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged,
- &ptr->ucast_rule, &ptr->mcast_rule);
+ ptr->tagged = tagged;
+ ptr->macvlan = val;
+ ptr->ndev = ndev;
+ err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
if (err)
goto err_add;
- ptr->macvlan = val;
idx = hash_64(val, 8);
hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
return 0;
@@ -1582,7 +2063,8 @@ static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tag
return;
hlist_del(&ptr->hlist);
- mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule);
+ mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
+ remove_steering_counters(ndev, ptr);
kfree(ptr);
}
@@ -1595,7 +2077,8 @@ static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
hlist_del(&pos->hlist);
- mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule);
+ mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
+ remove_steering_counters(ndev, pos);
kfree(pos);
}
}
@@ -1612,15 +2095,16 @@ static int setup_steering(struct mlx5_vdpa_net *ndev)
ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
if (!ns) {
- mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
+ mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n");
return -EOPNOTSUPP;
}
ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ndev->rxft)) {
- mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
+ mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n");
return PTR_ERR(ndev->rxft);
}
+ mlx5_vdpa_add_rx_flow_table(ndev);
err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
if (err)
@@ -1629,6 +2113,7 @@ static int setup_steering(struct mlx5_vdpa_net *ndev)
return 0;
err_add:
+ mlx5_vdpa_remove_rx_flow_table(ndev);
mlx5_destroy_flow_table(ndev->rxft);
return err;
}
@@ -1636,6 +2121,7 @@ err_add:
static void teardown_steering(struct mlx5_vdpa_net *ndev)
{
clear_mac_vlan_table(ndev);
+ mlx5_vdpa_remove_rx_flow_table(ndev);
mlx5_destroy_flow_table(ndev->rxft);
}
@@ -1731,37 +2217,48 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- int cur_qps = ndev->cur_num_vqs / 2;
+ int cur_vqs = ndev->cur_num_vqs;
+ int new_vqs = newqps * 2;
int err;
int i;
- if (cur_qps > newqps) {
- err = modify_rqt(ndev, 2 * newqps);
+ if (cur_vqs > new_vqs) {
+ err = modify_rqt(ndev, new_vqs);
if (err)
return err;
- for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
- teardown_vq(ndev, &ndev->vqs[i]);
+ if (is_resumable(ndev)) {
+ suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs);
+ } else {
+ for (i = new_vqs; i < cur_vqs; i++)
+ teardown_vq(ndev, &ndev->vqs[i]);
+ }
- ndev->cur_num_vqs = 2 * newqps;
+ ndev->cur_num_vqs = new_vqs;
} else {
- ndev->cur_num_vqs = 2 * newqps;
- for (i = cur_qps * 2; i < 2 * newqps; i++) {
- err = setup_vq(ndev, &ndev->vqs[i]);
+ ndev->cur_num_vqs = new_vqs;
+
+ for (i = cur_vqs; i < new_vqs; i++) {
+ err = setup_vq(ndev, &ndev->vqs[i], false);
if (err)
goto clean_added;
}
- err = modify_rqt(ndev, 2 * newqps);
+
+ err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs);
+ if (err)
+ goto clean_added;
+
+ err = modify_rqt(ndev, new_vqs);
if (err)
goto clean_added;
}
return 0;
clean_added:
- for (--i; i >= 2 * cur_qps; --i)
+ for (--i; i >= cur_vqs; --i)
teardown_vq(ndev, &ndev->vqs[i]);
- ndev->cur_num_vqs = 2 * cur_qps;
+ ndev->cur_num_vqs = cur_vqs;
return err;
}
@@ -1971,6 +2468,7 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
mvq->desc_addr = desc_area;
mvq->device_addr = device_area;
mvq->driver_addr = driver_area;
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
return 0;
}
@@ -1980,10 +2478,18 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
- if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
+ if (!is_index_valid(mvdev, idx))
return;
+ if (is_ctrl_vq_idx(mvdev, idx)) {
+ struct mlx5_control_vq *cvq = &mvdev->cvq;
+
+ cvq->vring.vring.num = num;
+ return;
+ }
+
mvq = &ndev->vqs[idx];
+ ndev->needs_teardown |= num != mvq->num_ent;
mvq->num_ent = num;
}
@@ -2023,7 +2529,6 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
- int err;
if (!mvdev->actual_features)
return;
@@ -2039,15 +2544,11 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
mvq = &ndev->vqs[idx];
if (!ready) {
suspend_vq(ndev, mvq);
- } else {
- err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
- if (err) {
- mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
+ } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ if (resume_vq(ndev, mvq))
ready = false;
- }
}
-
mvq->ready = ready;
}
@@ -2088,6 +2589,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
mvq->used_idx = state->split.avail_index;
mvq->avail_idx = state->split.avail_index;
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
return 0;
}
@@ -2121,9 +2624,9 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
return 0;
}
- err = query_virtqueue(ndev, mvq, &attr);
+ err = query_virtqueues(ndev, mvq->index, 1, &attr);
if (err) {
- mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
+ mlx5_vdpa_err(mvdev, "failed to query virtqueue\n");
return err;
}
state->split.avail_index = attr.used_index;
@@ -2145,23 +2648,37 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
return MLX5_VDPA_DATAVQ_GROUP;
}
-enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
- MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
- MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
- MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
-};
+static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+ if (is_ctrl_vq_idx(mvdev, idx))
+ return MLX5_VDPA_CVQ_GROUP;
+
+ return MLX5_VDPA_DATAVQ_DESC_GROUP;
+}
static u64 mlx_to_vritio_features(u16 dev_features)
{
u64 result = 0;
- if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
+ result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
return result;
@@ -2183,6 +2700,7 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
return mlx_vdpa_features;
}
@@ -2218,14 +2736,14 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
return 0;
}
-static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
+static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err;
int i;
for (i = 0; i < mvdev->max_vqs; i++) {
- err = setup_vq(ndev, &ndev->vqs[i]);
+ err = setup_vq(ndev, &ndev->vqs[i], filled);
if (err)
goto err_vq;
}
@@ -2241,16 +2759,10 @@ err_vq:
static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
{
- struct mlx5_vdpa_virtqueue *mvq;
int i;
- for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
- mvq = &ndev->vqs[i];
- if (!mvq->initialized)
- continue;
-
- teardown_vq(ndev, mvq);
- }
+ for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--)
+ teardown_vq(ndev, &ndev->vqs[i]);
}
static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
@@ -2271,10 +2783,127 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
}
}
+static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+ int err;
+
+ MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
+ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+ if (err)
+ return 0;
+
+ return MLX5_GET(query_vport_state_out, out, state);
+}
+
+static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
+{
+ if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
+ VPORT_STATE_UP)
+ return true;
+
+ return false;
+}
+
+static void update_carrier(struct work_struct *work)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+ struct mlx5_vdpa_dev *mvdev;
+ struct mlx5_vdpa_net *ndev;
+
+ wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
+ mvdev = wqent->mvdev;
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+ if (get_link_state(mvdev))
+ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+ else
+ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+
+ if (ndev->config_cb.callback)
+ ndev->config_cb.callback(ndev->config_cb.private);
+
+ kfree(wqent);
+}
+
+static int queue_link_work(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+
+ wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+ if (!wqent)
+ return -ENOMEM;
+
+ wqent->mvdev = &ndev->mvdev;
+ INIT_WORK(&wqent->work, update_carrier);
+ queue_work(ndev->mvdev.wq, &wqent->work);
+ return 0;
+}
+
+static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
+{
+ struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
+ struct mlx5_eqe *eqe = param;
+ int ret = NOTIFY_DONE;
+
+ if (ndev->mvdev.suspended)
+ return NOTIFY_DONE;
+
+ if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ if (queue_link_work(ndev))
+ return NOTIFY_DONE;
+
+ ret = NOTIFY_OK;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return ret;
+ }
+ return ret;
+}
+
+static void register_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
+ return;
+
+ ndev->nb.notifier_call = event_handler;
+ mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
+ ndev->nb_registered = true;
+ queue_link_work(ndev);
+}
+
+static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!ndev->nb_registered)
+ return;
+
+ ndev->nb_registered = false;
+ mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
+ if (ndev->mvdev.wq)
+ flush_workqueue(ndev->mvdev.wq);
+}
+
+static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
+{
+ return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
+}
+
static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ u64 old_features = mvdev->actual_features;
+ u64 diff_features;
int err;
print_features(mvdev, features, true);
@@ -2284,12 +2913,26 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
return err;
ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
- if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
- ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
- else
- ndev->rqt_size = 1;
- ndev->cur_num_vqs = 2 * ndev->rqt_size;
+ /* Interested in changes of vq features only. */
+ if (get_features(old_features) != get_features(mvdev->actual_features)) {
+ for (int i = 0; i < mvdev->max_vqs; ++i) {
+ struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
+
+ mvq->modified_fields |= (
+ MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION |
+ MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES
+ );
+ }
+ }
+
+ /* When below features diverge from initial device features, VQs need a full teardown. */
+#define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \
+ BIT_ULL(VIRTIO_NET_F_CSUM) | \
+ BIT_ULL(VIRTIO_F_VERSION_1))
+
+ diff_features = mvdev->mlx_features ^ mvdev->actual_features;
+ ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK);
update_cvq_info(mvdev);
return err;
@@ -2335,7 +2978,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
int err;
if (mvq->initialized) {
- err = query_virtqueue(ndev, mvq, &attr);
+ err = query_virtqueues(ndev, mvq->index, 1, &attr);
if (err)
return err;
}
@@ -2347,6 +2990,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
ri->desc_addr = mvq->desc_addr;
ri->device_addr = mvq->device_addr;
ri->driver_addr = mvq->driver_addr;
+ ri->map = mvq->map;
ri->restore = true;
return 0;
}
@@ -2377,7 +3021,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
int i;
mlx5_clear_vqs(ndev);
- init_mvqs(ndev);
+ mvqs_set_defaults(ndev);
for (i = 0; i < ndev->mvdev.max_vqs; i++) {
mvq = &ndev->vqs[i];
ri = &mvq->ri;
@@ -2391,46 +3035,52 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
mvq->desc_addr = ri->desc_addr;
mvq->device_addr = ri->device_addr;
mvq->driver_addr = ri->driver_addr;
+ mvq->map = ri->map;
}
}
static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
- struct vhost_iotlb *iotlb, unsigned int asid)
+ struct mlx5_vdpa_mr *new_mr,
+ unsigned int asid)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ bool teardown = !is_resumable(ndev);
int err;
- suspend_vqs(ndev);
- err = save_channels_info(ndev);
- if (err)
- goto err_mr;
+ suspend_vqs(ndev, 0, ndev->cur_num_vqs);
+ if (teardown) {
+ err = save_channels_info(ndev);
+ if (err)
+ return err;
- teardown_driver(ndev);
- mlx5_vdpa_destroy_mr(mvdev);
- err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
- if (err)
- goto err_mr;
+ teardown_vq_resources(ndev);
+ }
- if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
- goto err_mr;
+ mlx5_vdpa_update_mr(mvdev, new_mr, asid);
- restore_channels_info(ndev);
- err = setup_driver(mvdev);
- if (err)
- goto err_setup;
+ for (int i = 0; i < mvdev->max_vqs; i++)
+ ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
+ MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
- return 0;
+ if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
+ return 0;
-err_setup:
- mlx5_vdpa_destroy_mr(mvdev);
-err_mr:
- return err;
+ if (teardown) {
+ restore_channels_info(ndev);
+ err = setup_vq_resources(ndev, true);
+ if (err)
+ return err;
+ }
+
+ resume_vqs(ndev, 0, ndev->cur_num_vqs);
+
+ return 0;
}
/* reslock must be held for this function */
-static int setup_driver(struct mlx5_vdpa_dev *mvdev)
+static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled)
{
- struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
int err;
WARN_ON(!rwsem_is_locked(&ndev->reslock));
@@ -2440,10 +3090,16 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev)
err = 0;
goto out;
}
- err = setup_virtqueues(mvdev);
+ mlx5_vdpa_add_debugfs(ndev);
+
+ err = read_umem_params(ndev);
+ if (err)
+ goto err_setup;
+
+ err = setup_virtqueues(mvdev, filled);
if (err) {
mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
- goto out;
+ goto err_setup;
}
err = create_rqt(ndev);
@@ -2473,12 +3129,14 @@ err_tir:
destroy_rqt(ndev);
err_rqt:
teardown_virtqueues(ndev);
+err_setup:
+ mlx5_vdpa_remove_debugfs(ndev);
out:
return err;
}
/* reslock must be held for this function */
-static void teardown_driver(struct mlx5_vdpa_net *ndev)
+static void teardown_vq_resources(struct mlx5_vdpa_net *ndev)
{
WARN_ON(!rwsem_is_locked(&ndev->reslock));
@@ -2486,21 +3144,13 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev)
if (!ndev->setup)
return;
+ mlx5_vdpa_remove_debugfs(ndev);
teardown_steering(ndev);
destroy_tir(ndev);
destroy_rqt(ndev);
teardown_virtqueues(ndev);
ndev->setup = false;
-}
-
-static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
-{
- int i;
-
- for (i = 0; i < ndev->mvdev.max_vqs; i++)
- ndev->vqs[i].ready = false;
-
- ndev->mvdev.cvq.ready = false;
+ ndev->needs_teardown = false;
}
static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
@@ -2508,13 +3158,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
struct mlx5_control_vq *cvq = &mvdev->cvq;
int err = 0;
- if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
+ if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
+ u16 idx = cvq->vring.last_avail_idx;
+
err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
- MLX5_CVQ_MAX_ENT, false,
+ cvq->vring.vring.num, false,
(struct vring_desc *)(uintptr_t)cvq->desc_addr,
(struct vring_avail *)(uintptr_t)cvq->driver_addr,
(struct vring_used *)(uintptr_t)cvq->device_addr);
+ if (!err)
+ cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
+ }
return err;
}
@@ -2535,10 +3190,23 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
goto err_setup;
}
- err = setup_driver(mvdev);
- if (err) {
- mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
- goto err_setup;
+ register_link_notifier(ndev);
+
+ if (ndev->needs_teardown)
+ teardown_vq_resources(ndev);
+
+ if (ndev->setup) {
+ err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
+ if (err) {
+ mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
+ goto err_driver;
+ }
+ } else {
+ err = setup_vq_resources(ndev, true);
+ if (err) {
+ mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
+ goto err_driver;
+ }
}
} else {
mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
@@ -2550,8 +3218,10 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
up_write(&ndev->reslock);
return;
+err_driver:
+ unregister_link_notifier(ndev);
err_setup:
- mlx5_vdpa_destroy_mr(&ndev->mvdev);
+ mlx5_vdpa_clean_mrs(&ndev->mvdev);
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
err_clear:
up_write(&ndev->reslock);
@@ -2563,23 +3233,51 @@ static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
/* default mapping all groups are mapped to asid 0 */
for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
- mvdev->group2asid[i] = 0;
+ mvdev->mres.group2asid[i] = 0;
}
-static int mlx5_vdpa_reset(struct vdpa_device *vdev)
+static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0];
+
+ if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
+ return true;
+
+ if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT)
+ return true;
+
+ return mvq->modified_fields & (
+ MLX5_VIRTQ_MODIFY_MASK_STATE |
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS |
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX
+ );
+}
+
+static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ bool vq_reset;
print_status(mvdev, 0, true);
mlx5_vdpa_info(mvdev, "performing device reset\n");
down_write(&ndev->reslock);
- teardown_driver(ndev);
- clear_vqs_ready(ndev);
- mlx5_vdpa_destroy_mr(&ndev->mvdev);
+ unregister_link_notifier(ndev);
+ vq_reset = needs_vqs_reset(mvdev);
+ if (vq_reset) {
+ teardown_vq_resources(ndev);
+ mvqs_set_defaults(ndev);
+ }
+
+ if (flags & VDPA_RESET_F_CLEAN_MAP)
+ mlx5_vdpa_clean_mrs(&ndev->mvdev);
ndev->mvdev.status = 0;
- ndev->cur_num_vqs = 0;
+ ndev->mvdev.suspended = false;
+ ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
+ ndev->mvdev.cvq.ready = false;
ndev->mvdev.cvq.received_desc = 0;
ndev->mvdev.cvq.completed_desc = 0;
memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
@@ -2587,15 +3285,23 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
init_group_to_asid_map(mvdev);
++mvdev->generation;
- if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
- if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
- mlx5_vdpa_warn(mvdev, "create MR failed\n");
+ if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
+ MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
+ if (mlx5_vdpa_create_dma_mr(mvdev))
+ mlx5_vdpa_err(mvdev, "create MR failed\n");
}
+ if (vq_reset)
+ setup_vq_resources(ndev, false);
up_write(&ndev->reslock);
return 0;
}
+static int mlx5_vdpa_reset(struct vdpa_device *vdev)
+{
+ return mlx5_vdpa_compat_reset(vdev, 0);
+}
+
static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
{
return sizeof(struct virtio_net_config);
@@ -2627,18 +3333,38 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
unsigned int asid)
{
- bool change_map;
+ struct mlx5_vdpa_mr *new_mr;
int err;
- err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
- if (err) {
- mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
- return err;
+ if (asid >= MLX5_VDPA_NUM_AS)
+ return -EINVAL;
+
+ if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
+ new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
+ if (IS_ERR(new_mr)) {
+ err = PTR_ERR(new_mr);
+ mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err);
+ return err;
+ }
+ } else {
+ /* Empty iotlbs don't have an mr but will clear the previous mr. */
+ new_mr = NULL;
}
- if (change_map)
- err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
+ if (!mvdev->mres.mr[asid]) {
+ mlx5_vdpa_update_mr(mvdev, new_mr, asid);
+ } else {
+ err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
+ if (err) {
+ mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err);
+ goto out_err;
+ }
+ }
+
+ return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
+out_err:
+ mlx5_vdpa_put_mr(mvdev, new_mr);
return err;
}
@@ -2655,6 +3381,50 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
return err;
}
+static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ int err;
+
+ down_write(&ndev->reslock);
+ err = mlx5_vdpa_reset_mr(mvdev, asid);
+ up_write(&ndev->reslock);
+ return err;
+}
+
+static union virtio_map mlx5_get_vq_map(struct vdpa_device *vdev, u16 idx)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ union virtio_map map;
+
+ if (is_ctrl_vq_idx(mvdev, idx))
+ map.dma_dev = &vdev->dev;
+ else
+ map.dma_dev = mvdev->vdev.vmap.dma_dev;
+
+ return map;
+}
+
+static void free_irqs(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_vdpa_irq_pool_entry *ent;
+ int i;
+
+ if (!msix_mode_supported(&ndev->mvdev))
+ return;
+
+ if (!ndev->irqp.entries)
+ return;
+
+ for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
+ ent = ndev->irqp.entries + i;
+ if (ent->map.virq)
+ pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
+ }
+ kfree(ndev->irqp.entries);
+}
+
static void mlx5_vdpa_free(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2663,13 +3433,19 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
ndev = to_mlx5_vdpa_ndev(mvdev);
- free_resources(ndev);
- mlx5_vdpa_destroy_mr(mvdev);
+ /* Functions called here should be able to work with
+ * uninitialized resources.
+ */
+ free_fixed_resources(ndev);
+ mlx5_vdpa_clean_mrs(mvdev);
+ mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
if (!is_zero_ether_addr(ndev->config.mac)) {
pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
}
+ mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx);
mlx5_vdpa_free_resources(&ndev->mvdev);
+ free_irqs(ndev);
kfree(ndev->event_cbs);
kfree(ndev->vqs);
}
@@ -2698,9 +3474,23 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
return ret;
}
-static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
+static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
{
- return -EOPNOTSUPP;
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ struct mlx5_vdpa_virtqueue *mvq;
+
+ if (!is_index_valid(mvdev, idx))
+ return -EINVAL;
+
+ if (is_ctrl_vq_idx(mvdev, idx))
+ return -EOPNOTSUPP;
+
+ mvq = &ndev->vqs[idx];
+ if (!mvq->map.virq)
+ return -EOPNOTSUPP;
+
+ return mvq->map.virq;
}
static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
@@ -2812,32 +3602,55 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *mvq;
- int i;
+ int err;
+
+ mlx5_vdpa_info(mvdev, "suspending device\n");
down_write(&ndev->reslock);
- ndev->nb_registered = false;
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- flush_workqueue(ndev->mvdev.wq);
- for (i = 0; i < ndev->cur_num_vqs; i++) {
- mvq = &ndev->vqs[i];
- suspend_vq(ndev, mvq);
- }
+ err = suspend_vqs(ndev, 0, ndev->cur_num_vqs);
mlx5_vdpa_cvq_suspend(mvdev);
+ mvdev->suspended = true;
up_write(&ndev->reslock);
- return 0;
+
+ return err;
+}
+
+static int mlx5_vdpa_resume(struct vdpa_device *vdev)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev;
+ int err;
+
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+
+ mlx5_vdpa_info(mvdev, "resuming device\n");
+
+ down_write(&ndev->reslock);
+ mvdev->suspended = false;
+ err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
+ queue_link_work(ndev);
+ up_write(&ndev->reslock);
+
+ return err;
}
static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
unsigned int asid)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ int err = 0;
if (group >= MLX5_VDPA_NUMVQ_GROUPS)
return -EINVAL;
- mvdev->group2asid[group] = asid;
- return 0;
+ mvdev->mres.group2asid[group] = asid;
+
+ mutex_lock(&mvdev->mres.lock);
+ if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid])
+ err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid);
+ mutex_unlock(&mvdev->mres.lock);
+
+ return err;
}
static const struct vdpa_config_ops mlx5_vdpa_ops = {
@@ -2854,7 +3667,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_vq_irq = mlx5_get_vq_irq,
.get_vq_align = mlx5_vdpa_get_vq_align,
.get_vq_group = mlx5_vdpa_get_vq_group,
+ .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
.get_device_features = mlx5_vdpa_get_device_features,
+ .get_backend_features = mlx5_vdpa_get_backend_features,
.set_driver_features = mlx5_vdpa_set_driver_features,
.get_driver_features = mlx5_vdpa_get_driver_features,
.set_config_cb = mlx5_vdpa_set_config_cb,
@@ -2864,14 +3679,18 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_status = mlx5_vdpa_get_status,
.set_status = mlx5_vdpa_set_status,
.reset = mlx5_vdpa_reset,
+ .compat_reset = mlx5_vdpa_compat_reset,
.get_config_size = mlx5_vdpa_get_config_size,
.get_config = mlx5_vdpa_get_config,
.set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation,
.set_map = mlx5_vdpa_set_map,
+ .reset_map = mlx5_vdpa_reset_map,
.set_group_asid = mlx5_set_group_asid,
+ .get_vq_map = mlx5_get_vq_map,
.free = mlx5_vdpa_free,
.suspend = mlx5_vdpa_suspend,
+ .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
};
static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
@@ -2887,7 +3706,7 @@ static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
return 0;
}
-static int alloc_resources(struct mlx5_vdpa_net *ndev)
+static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev)
{
struct mlx5_vdpa_net_resources *res = &ndev->res;
int err;
@@ -2914,7 +3733,7 @@ err_tis:
return err;
}
-static void free_resources(struct mlx5_vdpa_net *ndev)
+static void free_fixed_resources(struct mlx5_vdpa_net *ndev)
{
struct mlx5_vdpa_net_resources *res = &ndev->res;
@@ -2926,7 +3745,7 @@ static void free_resources(struct mlx5_vdpa_net *ndev)
res->valid = false;
}
-static void init_mvqs(struct mlx5_vdpa_net *ndev)
+static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev)
{
struct mlx5_vdpa_virtqueue *mvq;
int i;
@@ -2938,12 +3757,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
mvq->ndev = ndev;
mvq->fwqp.fw = true;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
- }
- for (; i < ndev->mvdev.max_vqs; i++) {
- mvq = &ndev->vqs[i];
- memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
- mvq->index = i;
- mvq->ndev = ndev;
+ mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE;
}
}
@@ -2951,84 +3765,9 @@ struct mlx5_vdpa_mgmtdev {
struct vdpa_mgmt_dev mgtdev;
struct mlx5_adev *madev;
struct mlx5_vdpa_net *ndev;
+ struct vdpa_config_ops vdpa_ops;
};
-static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
- u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
- int err;
-
- MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
- MLX5_SET(query_vport_state_in, in, op_mod, opmod);
- MLX5_SET(query_vport_state_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(query_vport_state_in, in, other_vport, 1);
-
- err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
- if (err)
- return 0;
-
- return MLX5_GET(query_vport_state_out, out, state);
-}
-
-static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
-{
- if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
- VPORT_STATE_UP)
- return true;
-
- return false;
-}
-
-static void update_carrier(struct work_struct *work)
-{
- struct mlx5_vdpa_wq_ent *wqent;
- struct mlx5_vdpa_dev *mvdev;
- struct mlx5_vdpa_net *ndev;
-
- wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
- mvdev = wqent->mvdev;
- ndev = to_mlx5_vdpa_ndev(mvdev);
- if (get_link_state(mvdev))
- ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
- else
- ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
-
- if (ndev->nb_registered && ndev->config_cb.callback)
- ndev->config_cb.callback(ndev->config_cb.private);
-
- kfree(wqent);
-}
-
-static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
-{
- struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
- struct mlx5_eqe *eqe = param;
- int ret = NOTIFY_DONE;
- struct mlx5_vdpa_wq_ent *wqent;
-
- if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
- if (!wqent)
- return NOTIFY_DONE;
-
- wqent->mvdev = &ndev->mvdev;
- INIT_WORK(&wqent->work, update_carrier);
- queue_work(ndev->mvdev.wq, &wqent->work);
- ret = NOTIFY_OK;
- break;
- default:
- return NOTIFY_DONE;
- }
- return ret;
- }
- return ret;
-}
-
static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
{
int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
@@ -3051,6 +3790,34 @@ static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
return err;
}
+static void allocate_irqs(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_vdpa_irq_pool_entry *ent;
+ int i;
+
+ if (!msix_mode_supported(&ndev->mvdev))
+ return;
+
+ if (!ndev->mvdev.mdev->pdev)
+ return;
+
+ ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
+ if (!ndev->irqp.entries)
+ return;
+
+
+ for (i = 0; i < ndev->mvdev.max_vqs; i++) {
+ ent = ndev->irqp.entries + i;
+ snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
+ dev_name(&ndev->mvdev.vdev.dev), i);
+ ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
+ if (!ent->map.virq)
+ return;
+
+ ndev->irqp.num_ent++;
+ }
+}
+
static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
const struct vdpa_dev_set_config *add_config)
{
@@ -3060,6 +3827,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
struct mlx5_vdpa_dev *mvdev;
struct mlx5_vdpa_net *ndev;
struct mlx5_core_dev *mdev;
+ u64 device_features;
u32 max_vqs;
u16 mtu;
int err;
@@ -3068,6 +3836,26 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
return -ENOSPC;
mdev = mgtdev->madev->mdev;
+ device_features = mgtdev->mgtdev.supported_features;
+ if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
+ if (add_config->device_features & ~device_features) {
+ dev_warn(mdev->device,
+ "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
+ add_config->device_features, device_features);
+ return -EINVAL;
+ }
+ device_features &= add_config->device_features;
+ } else {
+ device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
+ }
+ if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
+ device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
+ dev_warn(mdev->device,
+ "Must provision minimum features 0x%llx for this device",
+ BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
+ return -EOPNOTSUPP;
+ }
+
if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
dev_warn(mdev->device, "missing support for split virtqueues\n");
@@ -3091,15 +3879,19 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
max_vqs = 2;
}
- ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
- MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
+ ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
+ NULL, MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
- ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
ndev->mvdev.max_vqs = max_vqs;
mvdev = &ndev->mvdev;
mvdev->mdev = mdev;
+ /* cpu_to_mlx5vdpa16() below depends on this flag */
+ mvdev->actual_features =
+ (device_features & BIT_ULL(VIRTIO_F_VERSION_1));
+
+ mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx);
ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
@@ -3107,8 +3899,10 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
err = -ENOMEM;
goto err_alloc;
}
+ ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
- init_mvqs(ndev);
+ mvqs_set_defaults(ndev);
+ allocate_irqs(ndev);
init_rwsem(&ndev->reslock);
config = &ndev->config;
@@ -3118,20 +3912,26 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
goto err_alloc;
}
- err = query_mtu(mdev, &mtu);
- if (err)
- goto err_alloc;
+ if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
+ err = query_mtu(mdev, &mtu);
+ if (err)
+ goto err_alloc;
- ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
+ ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
+ }
- if (get_link_state(mvdev))
- ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
- else
- ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+ if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
+ if (get_link_state(mvdev))
+ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+ else
+ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+ }
if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
- } else {
+ /* No bother setting mac address in config if not going to provision _F_MAC */
+ } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
+ device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
if (err)
goto err_alloc;
@@ -3142,56 +3942,80 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
err = mlx5_mpfs_add_mac(pfmdev, config->mac);
if (err)
goto err_alloc;
+ } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
+ /*
+ * We used to clear _F_MAC feature bit if seeing
+ * zero mac address when device features are not
+ * specifically provisioned. Keep the behaviour
+ * so old scripts do not break.
+ */
+ device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
+ } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
+ /* Don't provision zero mac address for _F_MAC */
+ mlx5_vdpa_warn(&ndev->mvdev,
+ "No mac address provisioned?\n");
+ err = -EINVAL;
+ goto err_alloc;
+ }
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
+ if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) {
+ config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
+ ndev->rqt_size = max_vqs / 2;
+ } else {
+ ndev->rqt_size = 1;
}
- config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
- mvdev->vdev.dma_dev = &mdev->pdev->dev;
+ ndev->mvdev.mlx_features = device_features;
+ mvdev->vdev.vmap.dma_dev = &mdev->pdev->dev;
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
if (err)
- goto err_mpfs;
+ goto err_alloc;
+
+ err = mlx5_vdpa_init_mr_resources(mvdev);
+ if (err)
+ goto err_alloc;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
- err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
+ err = mlx5_vdpa_create_dma_mr(mvdev);
if (err)
- goto err_res;
+ goto err_alloc;
}
- err = alloc_resources(ndev);
+ err = alloc_fixed_resources(ndev);
if (err)
- goto err_mr;
+ goto err_alloc;
ndev->cvq_ent.mvdev = mvdev;
INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
if (!mvdev->wq) {
err = -ENOMEM;
- goto err_res2;
+ goto err_alloc;
}
- ndev->nb.notifier_call = event_handler;
- mlx5_notifier_register(mdev, &ndev->nb);
- ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err)
goto err_reg;
mgtdev->ndev = ndev;
+
+ /* For virtio-vdpa, the device was set up during device register. */
+ if (ndev->setup)
+ return 0;
+
+ down_write(&ndev->reslock);
+ err = setup_vq_resources(ndev, false);
+ up_write(&ndev->reslock);
+ if (err)
+ goto err_setup_vq_res;
+
return 0;
+err_setup_vq_res:
+ _vdpa_unregister_device(&mvdev->vdev);
err_reg:
destroy_workqueue(mvdev->wq);
-err_res2:
- free_resources(ndev);
-err_mr:
- mlx5_vdpa_destroy_mr(mvdev);
-err_res:
- mlx5_vdpa_free_resources(&ndev->mvdev);
-err_mpfs:
- if (!is_zero_ether_addr(config->mac))
- mlx5_mpfs_del_mac(pfmdev, config->mac);
err_alloc:
put_device(&mvdev->vdev.dev);
return err;
@@ -3204,20 +4028,50 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct workqueue_struct *wq;
- if (ndev->nb_registered) {
- ndev->nb_registered = false;
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- }
+ unregister_link_notifier(ndev);
+ _vdpa_unregister_device(dev);
+
+ down_write(&ndev->reslock);
+ teardown_vq_resources(ndev);
+ up_write(&ndev->reslock);
+
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
- _vdpa_unregister_device(dev);
mgtdev->ndev = NULL;
}
+static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev,
+ const struct vdpa_dev_set_config *add_config)
+{
+ struct virtio_net_config *config;
+ struct mlx5_core_dev *pfmdev;
+ struct mlx5_vdpa_dev *mvdev;
+ struct mlx5_vdpa_net *ndev;
+ struct mlx5_core_dev *mdev;
+ int err = -EOPNOTSUPP;
+
+ mvdev = to_mvdev(dev);
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+ mdev = mvdev->mdev;
+ config = &ndev->config;
+
+ down_write(&ndev->reslock);
+ if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
+ pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
+ err = mlx5_mpfs_add_mac(pfmdev, config->mac);
+ if (!err)
+ ether_addr_copy(config->mac, add_config->net.mac);
+ }
+
+ up_write(&ndev->reslock);
+ return err;
+}
+
static const struct vdpa_mgmtdev_ops mdev_ops = {
.dev_add = mlx5_vdpa_dev_add,
.dev_del = mlx5_vdpa_dev_del,
+ .dev_set_attr = mlx5_vdpa_set_attr,
};
static struct virtio_device_id id_table[] = {
@@ -3243,11 +4097,19 @@ static int mlx5v_probe(struct auxiliary_device *adev,
mgtdev->mgtdev.id_table = id_table;
mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
- BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
+ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
+ BIT_ULL(VDPA_ATTR_DEV_FEATURES);
mgtdev->mgtdev.max_supported_vqs =
MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
mgtdev->mgtdev.supported_features = get_supported_features(mdev);
mgtdev->madev = madev;
+ mgtdev->vdpa_ops = mlx5_vdpa_ops;
+
+ if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
+ mgtdev->vdpa_ops.get_vq_desc_group = NULL;
+
+ if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
+ mgtdev->vdpa_ops.resume = NULL;
err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
if (err)
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h
new file mode 100644
index 000000000000..00e79a7d0be8
--- /dev/null
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_VNET_H__
+#define __MLX5_VNET_H__
+
+#include "mlx5_vdpa.h"
+
+#define to_mlx5_vdpa_ndev(__mvdev) \
+ container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
+#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
+
+struct mlx5_vdpa_net_resources {
+ u32 tisn;
+ u32 tdn;
+ u32 tirn;
+ u32 rqtn;
+ bool valid;
+ struct dentry *tirn_dent;
+};
+
+#define MLX5V_MACVLAN_SIZE 256
+
+static inline u16 key2vid(u64 key)
+{
+ return (u16)(key >> 48) & 0xfff;
+}
+
+#define MLX5_VDPA_IRQ_NAME_LEN 32
+
+struct mlx5_vdpa_irq_pool_entry {
+ struct msi_map map;
+ bool used;
+ char name[MLX5_VDPA_IRQ_NAME_LEN];
+ void *dev_id;
+};
+
+struct mlx5_vdpa_irq_pool {
+ int num_ent;
+ struct mlx5_vdpa_irq_pool_entry *entries;
+};
+
+struct mlx5_vdpa_net {
+ struct mlx5_vdpa_dev mvdev;
+ struct mlx5_vdpa_net_resources res;
+ struct virtio_net_config config;
+ struct mlx5_vdpa_virtqueue *vqs;
+ struct vdpa_callback *event_cbs;
+
+ /* Serialize vq resources creation and destruction. This is required
+ * since memory map might change and we need to destroy and create
+ * resources while driver in operational.
+ */
+ struct rw_semaphore reslock;
+ struct mlx5_flow_table *rxft;
+ struct dentry *rx_dent;
+ struct dentry *rx_table_dent;
+ bool setup;
+ bool needs_teardown;
+ u32 cur_num_vqs;
+ u32 rqt_size;
+ bool nb_registered;
+ struct notifier_block nb;
+ struct vdpa_callback config_cb;
+ struct mlx5_vdpa_wq_ent cvq_ent;
+ struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
+ struct mlx5_vdpa_irq_pool irqp;
+ struct dentry *debugfs;
+
+ u32 umem_1_buffer_param_a;
+ u32 umem_1_buffer_param_b;
+
+ u32 umem_2_buffer_param_a;
+ u32 umem_2_buffer_param_b;
+
+ u32 umem_3_buffer_param_a;
+ u32 umem_3_buffer_param_b;
+};
+
+struct mlx5_vdpa_counter {
+ struct mlx5_fc *counter;
+ struct dentry *dent;
+ struct mlx5_core_dev *mdev;
+};
+
+struct macvlan_node {
+ struct hlist_node hlist;
+ struct mlx5_flow_handle *ucast_rule;
+ struct mlx5_flow_handle *mcast_rule;
+ u64 macvlan;
+ struct mlx5_vdpa_net *ndev;
+ bool tagged;
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ struct dentry *dent;
+ struct mlx5_vdpa_counter ucast_counter;
+ struct mlx5_vdpa_counter mcast_counter;
+#endif
+};
+
+void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_remove_debugfs(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_add_rx_flow_table(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_remove_rx_flow_table(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_add_tirn(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_remove_tirn(struct mlx5_vdpa_net *ndev);
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node);
+void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node);
+#else
+static inline void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node) {}
+static inline void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node) {}
+#endif
+
+
+#endif /* __MLX5_VNET_H__ */