diff options
Diffstat (limited to 'drivers/vdpa/mlx5')
| -rw-r--r-- | drivers/vdpa/mlx5/Makefile | 2 | ||||
| -rw-r--r-- | drivers/vdpa/mlx5/core/mlx5_vdpa.h | 79 | ||||
| -rw-r--r-- | drivers/vdpa/mlx5/core/mr.c | 533 | ||||
| -rw-r--r-- | drivers/vdpa/mlx5/core/resources.c | 79 | ||||
| -rw-r--r-- | drivers/vdpa/mlx5/net/debug.c | 153 | ||||
| -rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.c | 1734 | ||||
| -rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.h | 119 |
7 files changed, 2148 insertions, 551 deletions
diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile index f717978c83bf..e791394c33e3 100644 --- a/drivers/vdpa/mlx5/Makefile +++ b/drivers/vdpa/mlx5/Makefile @@ -1,4 +1,4 @@ subdir-ccflags-y += -I$(srctree)/drivers/vdpa/mlx5/core obj-$(CONFIG_MLX5_VDPA_NET) += mlx5_vdpa.o -mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/mlx5_vnet.o core/resources.o core/mr.o +mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/mlx5_vnet.o core/resources.o core/mr.o net/debug.o diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 058fbe28107e..2cedf7e2dbc4 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -31,11 +31,13 @@ struct mlx5_vdpa_mr { struct list_head head; unsigned long num_directs; unsigned long num_klms; - bool initialized; - /* serialize mkey creation and destruction */ - struct mutex mkey_mtx; + struct vhost_iotlb *iotlb; + bool user_mr; + + refcount_t refcount; + struct list_head mr_list; }; struct mlx5_vdpa_resources { @@ -73,17 +75,36 @@ struct mlx5_vdpa_wq_ent { enum { MLX5_VDPA_DATAVQ_GROUP, MLX5_VDPA_CVQ_GROUP, + MLX5_VDPA_DATAVQ_DESC_GROUP, MLX5_VDPA_NUMVQ_GROUPS }; enum { - MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS + MLX5_VDPA_NUM_AS = 2 +}; + +struct mlx5_vdpa_mr_resources { + struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; + unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; + + /* Pre-deletion mr list */ + struct list_head mr_list_head; + + /* Deferred mr list */ + struct list_head mr_gc_list_head; + struct workqueue_struct *wq_gc; + struct delayed_work gc_dwork_ent; + + struct mutex lock; + + atomic_t shutdown; }; struct mlx5_vdpa_dev { struct vdpa_device vdev; struct mlx5_core_dev *mdev; struct mlx5_vdpa_resources res; + struct mlx5_vdpa_mr_resources mres; u64 mlx_features; u64 actual_features; @@ -92,15 +113,25 @@ struct mlx5_vdpa_dev { u16 max_idx; u32 generation; - struct mlx5_vdpa_mr mr; struct mlx5_control_vq cvq; struct workqueue_struct *wq; - unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; + bool suspended; + + struct mlx5_async_ctx async_ctx; +}; + +struct mlx5_vdpa_async_cmd { + int err; + struct mlx5_async_work cb_work; + struct completion cmd_done; + + void *in; + size_t inlen; + + void *out; + size_t outlen; }; -int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); -int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid); -int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey); int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn); int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn); @@ -115,11 +146,31 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int inlen); int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); -int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map, unsigned int asid); -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - unsigned int asid); -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); +struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb); +int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr); +void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr); +void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr, + unsigned int asid); +int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid); +int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev); +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid); +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int num_cmds); + +#define mlx5_vdpa_err(__dev, format, ...) \ + dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 0a1e0b0dc37e..8870a7169267 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -49,17 +49,23 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) } } -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +struct mlx5_create_mkey_mem { + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; + __be64 mtt[]; +}; + +struct mlx5_destroy_mkey_mem { + u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)]; + u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)]; +}; + +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) { - int inlen; + void *in = &mem->in; void *mkc; - void *in; - int err; - - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); @@ -76,18 +82,36 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(mr->end - mr->start, mr->log_size)); populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); - kvfree(in); - if (err) { - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); - return err; - } - return 0; + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); +} + +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) +{ + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); + + mr->mr = mlx5_idx_to_mkey(mkey_index); +} + +static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_destroy_mkey_mem *mem) +{ + void *in = &mem->in; + + MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr)); } static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { + if (!mr->mr) + return; + mlx5_vdpa_destroy_mkey(mvdev, mr->mr); } @@ -166,9 +190,12 @@ again: klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); preve = dmr->end; } else { + u64 bcount = min_t(u64, dmr->start - preve, MAX_KLM_SIZE); + klm->key = cpu_to_be32(mvdev->res.null_mkey); - klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); - preve = dmr->start; + klm->bcount = cpu_to_be32(klm_bcount(bcount)); + preve += bcount; + goto again; } } @@ -179,6 +206,123 @@ static int klm_byte_size(int nklms) return 16 * ALIGN(nklms, 4); } +#define MLX5_VDPA_MTT_ALIGN 16 + +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + struct mlx5_vdpa_async_cmd *cmds; + struct mlx5_vdpa_direct_mr *dmr; + int err = 0; + int i = 0; + + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); + if (!cmds) + return -ENOMEM; + + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_create_mkey_mem *cmd_mem; + int mttlen, mttcount; + + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN); + mttcount = mttlen / sizeof(cmd_mem->mtt[0]); + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); + if (!cmd_mem) { + err = -ENOMEM; + goto done; + } + + cmds[i].out = cmd_mem->out; + cmds[i].outlen = sizeof(cmd_mem->out); + cmds[i].in = cmd_mem->in; + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); + + fill_create_direct_mr(mvdev, dmr, cmd_mem); + + i++; + } + + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); + if (err) { + + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); + goto done; + } + + i = 0; + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); + + if (!cmd->err) { + create_direct_mr_end(mvdev, dmr, cmd_mem); + } else { + err = err ? err : cmd->err; + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", + dmr->start, dmr->end, cmd->err); + } + } + +done: + for (i = i-1; i >= 0; i--) { + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); + kvfree(cmd_mem); + } + + kvfree(cmds); + return err; +} + +DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T)) +DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T)) + +static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL; + struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL; + struct mlx5_vdpa_direct_mr *dmr; + int err = 0; + int i = 0; + + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) + return -ENOMEM; + + list_for_each_entry(dmr, &mr->head, list) { + cmds[i].out = cmd_mem[i].out; + cmds[i].outlen = sizeof(cmd_mem[i].out); + cmds[i].in = cmd_mem[i].in; + cmds[i].inlen = sizeof(cmd_mem[i].in); + fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]); + i++; + } + + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); + if (err) { + + mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err); + return err; + } + + i = 0; + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; + + dmr->mr = 0; + if (cmd->err) { + err = err ? err : cmd->err; + mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n", + dmr->start, dmr->end, cmd->err); + } + } + + return err; +} + static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { int inlen; @@ -227,21 +371,19 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr unsigned long lgcd = 0; int log_entity_size; unsigned long size; - u64 start = 0; int err; struct page *pg; unsigned int nsg; int sglen; - u64 pa; + u64 pa, offset; u64 paend; struct scatterlist *sg; - struct device *dma = mvdev->vdev.dma_dev; + struct device *dma = mvdev->vdev.vmap.dma_dev; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); - map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { + map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { size = maplen(map, mr); lgcd = gcd(lgcd, size); - start += size; } log_entity_size = ilog2(lgcd); @@ -255,8 +397,10 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr sg = mr->sg_head.sgl; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { - paend = map->addr + maplen(map, mr); - for (pa = map->addr; pa < paend; pa += sglen) { + offset = mr->start > map->start ? mr->start - map->start : 0; + pa = map->addr + offset; + paend = map->addr + offset + maplen(map, mr); + for (; pa < paend; pa += sglen) { pg = pfn_to_page(__phys_to_pfn(pa)); if (!sg) { mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", @@ -279,14 +423,8 @@ done: goto err_map; } - err = create_direct_mr(mvdev, mr); - if (err) - goto err_direct; - return 0; -err_direct: - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); err_map: sg_free_table(&mr->sg_head); return err; @@ -294,17 +432,20 @@ err_map: static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { - struct device *dma = mvdev->vdev.dma_dev; + struct device *dma = mvdev->vdev.vmap.dma_dev; destroy_direct_mr(mvdev, mr); dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); sg_free_table(&mr->sg_head); } -static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, +static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr, + u64 start, + u64 size, + u8 perm, struct vhost_iotlb *iotlb) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *n; LIST_HEAD(tmp); @@ -354,9 +495,10 @@ err_alloc: * indirect memory key that provides access to the enitre address space given * by iotlb. */ -static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int create_user_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr, + struct vhost_iotlb *iotlb) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *n; struct vhost_iotlb_map *map; @@ -384,7 +526,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb LOG_MAX_KLM_SIZE); mr->num_klms += nnuls; } - err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); if (err) goto err_chain; } @@ -393,7 +535,11 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb pperm = map->perm; } } - err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); + if (err) + goto err_chain; + + err = create_direct_keys(mvdev, mr); if (err) goto err_chain; @@ -450,20 +596,23 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) mlx5_vdpa_destroy_mkey(mvdev, mr->mkey); } -static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) +static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src) { struct vhost_iotlb_map *map; u64 start = 0, last = ULLONG_MAX; int err; + if (dst == src) + return -EINVAL; + if (!src) { - err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW); + err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW); return err; } for (map = vhost_iotlb_itree_first(src, start, last); map; map = vhost_iotlb_itree_next(map, start, last)) { - err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last, + err = vhost_iotlb_add_range(dst, map->start, map->last, map->addr, map->perm); if (err) return err; @@ -471,9 +620,9 @@ static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) return 0; } -static void prune_iotlb(struct mlx5_vdpa_dev *mvdev) +static void prune_iotlb(struct vhost_iotlb *iotlb) { - vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX); + vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX); } static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) @@ -482,6 +631,7 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr struct mlx5_vdpa_direct_mr *n; destroy_indirect_key(mvdev, mr); + destroy_direct_keys(mvdev, mr); list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { list_del_init(&dmr->list); unmap_direct_mr(mvdev, dmr); @@ -489,91 +639,282 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr } } -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; + if (WARN_ON(!mr)) + return; - mutex_lock(&mr->mkey_mtx); - if (!mr->initialized) - goto out; - - prune_iotlb(mvdev); if (mr->user_mr) destroy_user_mr(mvdev, mr); else destroy_dma_mr(mvdev, mr); - memset(mr, 0, sizeof(*mr)); - mr->initialized = false; -out: - mutex_unlock(&mr->mkey_mtx); + vhost_iotlb_free(mr->iotlb); + + list_del(&mr->mr_list); + + kfree(mr); +} + +/* There can be multiple .set_map() operations in quick succession. + * This large delay is a simple way to prevent the MR cleanup from blocking + * .set_map() MR creation in this scenario. + */ +#define MLX5_VDPA_MR_GC_TRIGGER_MS 2000 + +static void mlx5_vdpa_mr_gc_handler(struct work_struct *work) +{ + struct mlx5_vdpa_mr_resources *mres; + struct mlx5_vdpa_mr *mr, *tmp; + struct mlx5_vdpa_dev *mvdev; + + mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work); + + if (atomic_read(&mres->shutdown)) { + mutex_lock(&mres->lock); + } else if (!mutex_trylock(&mres->lock)) { + queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, + msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); + return; + } + + mvdev = container_of(mres, struct mlx5_vdpa_dev, mres); + + list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) { + _mlx5_vdpa_destroy_mr(mvdev, mr); + } + + mutex_unlock(&mres->lock); +} + +static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + + if (!mr) + return; + + if (refcount_dec_and_test(&mr->refcount)) { + list_move_tail(&mr->mr_list, &mres->mr_gc_list_head); + queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, + msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); + } +} + +void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + mutex_lock(&mvdev->mres.lock); + _mlx5_vdpa_put_mr(mvdev, mr); + mutex_unlock(&mvdev->mres.lock); +} + +static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + if (!mr) + return; + + refcount_inc(&mr->refcount); +} + +void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *mr) +{ + mutex_lock(&mvdev->mres.lock); + _mlx5_vdpa_get_mr(mvdev, mr); + mutex_unlock(&mvdev->mres.lock); +} + +void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_mr *new_mr, + unsigned int asid) +{ + struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid]; + + mutex_lock(&mvdev->mres.lock); + + _mlx5_vdpa_put_mr(mvdev, old_mr); + mvdev->mres.mr[asid] = new_mr; + + mutex_unlock(&mvdev->mres.lock); +} + +static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr *mr; + + mutex_lock(&mvdev->mres.lock); + + list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) { + + mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " + "mr: %p, mkey: 0x%x, refcount: %u\n", + mr, mr->mkey, refcount_read(&mr->refcount)); + } + + mutex_unlock(&mvdev->mres.lock); + +} + +void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev) +{ + if (!mvdev->res.valid) + return; + + for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) + mlx5_vdpa_update_mr(mvdev, NULL, i); + + prune_iotlb(mvdev->cvq.iotlb); + + mlx5_vdpa_show_mr_leaks(mvdev); } static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, unsigned int asid) + struct mlx5_vdpa_mr *mr, + struct vhost_iotlb *iotlb) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; int err; - if (mr->initialized) - return 0; + if (iotlb) + err = create_user_mr(mvdev, mr, iotlb); + else + err = create_dma_mr(mvdev, mr); - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - if (iotlb) - err = create_user_mr(mvdev, iotlb); - else - err = create_dma_mr(mvdev, mr); + if (err) + return err; - if (err) - return err; + mr->iotlb = vhost_iotlb_alloc(0, 0); + if (!mr->iotlb) { + err = -ENOMEM; + goto err_mr; } - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) { - err = dup_iotlb(mvdev, iotlb); - if (err) - goto out_err; - } + err = dup_iotlb(mr->iotlb, iotlb); + if (err) + goto err_iotlb; + + list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head); - mr->initialized = true; return 0; -out_err: - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - if (iotlb) - destroy_user_mr(mvdev, mr); - else - destroy_dma_mr(mvdev, mr); - } +err_iotlb: + vhost_iotlb_free(mr->iotlb); + +err_mr: + if (iotlb) + destroy_user_mr(mvdev, mr); + else + destroy_dma_mr(mvdev, mr); return err; } -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - unsigned int asid) +struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb) { + struct mlx5_vdpa_mr *mr; int err; - mutex_lock(&mvdev->mr.mkey_mtx); - err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); - mutex_unlock(&mvdev->mr.mkey_mtx); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mutex_lock(&mvdev->mres.lock); + err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb); + mutex_unlock(&mvdev->mres.lock); + + if (err) + goto out_err; + + refcount_set(&mr->refcount, 1); + + return mr; + +out_err: + kfree(mr); + return ERR_PTR(err); +} + +int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid) +{ + int err; + + if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + return 0; + + spin_lock(&mvdev->cvq.iommu_lock); + + prune_iotlb(mvdev->cvq.iotlb); + err = dup_iotlb(mvdev->cvq.iotlb, iotlb); + + spin_unlock(&mvdev->cvq.iommu_lock); + return err; } -int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map, unsigned int asid) +int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; - int err = 0; + struct mlx5_vdpa_mr *mr; + + mr = mlx5_vdpa_create_mr(mvdev, NULL); + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mlx5_vdpa_update_mr(mvdev, mr, 0); - *change_map = false; - mutex_lock(&mr->mkey_mtx); - if (mr->initialized) { - mlx5_vdpa_info(mvdev, "memory map update\n"); - *change_map = true; + return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0); +} + +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +{ + if (asid >= MLX5_VDPA_NUM_AS) + return -EINVAL; + + mlx5_vdpa_update_mr(mvdev, NULL, asid); + + if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { + if (mlx5_vdpa_create_dma_mr(mvdev)) + mlx5_vdpa_warn(mvdev, "create DMA MR failed\n"); + } else { + mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid); } - if (!*change_map) - err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); - mutex_unlock(&mr->mkey_mtx); - return err; + return 0; +} + +int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + + mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc"); + if (!mres->wq_gc) + return -ENOMEM; + + INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler); + + mutex_init(&mres->lock); + + INIT_LIST_HEAD(&mres->mr_list_head); + INIT_LIST_HEAD(&mres->mr_gc_list_head); + + return 0; +} + +void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + + if (!mres->wq_gc) + return; + + atomic_set(&mres->shutdown, 1); + + flush_delayed_work(&mres->gc_dwork_ent); + destroy_workqueue(mres->wq_gc); + mres->wq_gc = NULL; + mutex_destroy(&mres->lock); } diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index 9800f9bec225..aeae31d0cefa 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -213,7 +213,7 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, return err; mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); - *mkey |= mlx5_idx_to_mkey(mkey_index); + *mkey = mlx5_idx_to_mkey(mkey_index); return 0; } @@ -233,6 +233,7 @@ static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev) if (!mvdev->cvq.iotlb) return -ENOMEM; + spin_lock_init(&mvdev->cvq.iommu_lock); vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock); return 0; @@ -255,7 +256,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) mlx5_vdpa_warn(mvdev, "resources already allocated\n"); return -EINVAL; } - mutex_init(&mvdev->mr.mkey_mtx); res->uar = mlx5_get_uars_page(mdev); if (IS_ERR(res->uar)) { err = PTR_ERR(res->uar); @@ -300,7 +300,6 @@ err_pd: err_uctx: mlx5_put_uars_page(mdev, res->uar); err_uars: - mutex_destroy(&mvdev->mr.mkey_mtx); return err; } @@ -317,6 +316,78 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) dealloc_pd(mvdev, res->pdn, res->uid); destroy_uctx(mvdev, res->uid); mlx5_put_uars_page(mvdev->mdev, res->uar); - mutex_destroy(&mvdev->mr.mkey_mtx); res->valid = false; } + +static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5_vdpa_async_cmd *cmd = + container_of(context, struct mlx5_vdpa_async_cmd, cb_work); + + cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out); + complete(&cmd->cmd_done); +} + +static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int issued, + int *completed) + +{ + struct mlx5_vdpa_async_cmd *cmd = &cmds[issued]; + int err; + +retry: + err = mlx5_cmd_exec_cb(&mvdev->async_ctx, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen, + virtqueue_cmd_callback, + &cmd->cb_work); + if (err == -EBUSY) { + if (*completed < issued) { + /* Throttled by own commands: wait for oldest completion. */ + wait_for_completion(&cmds[*completed].cmd_done); + (*completed)++; + + goto retry; + } else { + /* Throttled by external commands: switch to sync api. */ + err = mlx5_cmd_exec(mvdev->mdev, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen); + if (!err) + (*completed)++; + } + } + + return err; +} + +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int num_cmds) +{ + int completed = 0; + int issued = 0; + int err = 0; + + for (int i = 0; i < num_cmds; i++) + init_completion(&cmds[i].cmd_done); + + while (issued < num_cmds) { + + err = issue_async_cmd(mvdev, cmds, issued, &completed); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n", + issued, num_cmds, err); + break; + } + + issued++; + } + + while (completed < issued) + wait_for_completion(&cmds[completed++].cmd_done); + + return err; +} diff --git a/drivers/vdpa/mlx5/net/debug.c b/drivers/vdpa/mlx5/net/debug.c new file mode 100644 index 000000000000..9c85162c19fc --- /dev/null +++ b/drivers/vdpa/mlx5/net/debug.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/debugfs.h> +#include <linux/mlx5/fs.h> +#include "mlx5_vnet.h" + +static int tirn_show(struct seq_file *file, void *priv) +{ + struct mlx5_vdpa_net *ndev = file->private; + + seq_printf(file, "0x%x\n", ndev->res.tirn); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(tirn); + +void mlx5_vdpa_remove_tirn(struct mlx5_vdpa_net *ndev) +{ + if (ndev->debugfs) + debugfs_remove(ndev->res.tirn_dent); +} + +void mlx5_vdpa_add_tirn(struct mlx5_vdpa_net *ndev) +{ + ndev->res.tirn_dent = debugfs_create_file("tirn", 0444, ndev->rx_dent, + ndev, &tirn_fops); +} + +static int rx_flow_table_show(struct seq_file *file, void *priv) +{ + struct mlx5_vdpa_net *ndev = file->private; + + seq_printf(file, "0x%x\n", mlx5_flow_table_id(ndev->rxft)); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(rx_flow_table); + +void mlx5_vdpa_remove_rx_flow_table(struct mlx5_vdpa_net *ndev) +{ + if (ndev->debugfs) + debugfs_remove(ndev->rx_table_dent); +} + +void mlx5_vdpa_add_rx_flow_table(struct mlx5_vdpa_net *ndev) +{ + ndev->rx_table_dent = debugfs_create_file("table_id", 0444, ndev->rx_dent, + ndev, &rx_flow_table_fops); +} + +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) +static int packets_show(struct seq_file *file, void *priv) +{ + struct mlx5_vdpa_counter *counter = file->private; + u64 packets; + u64 bytes; + int err; + + err = mlx5_fc_query(counter->mdev, counter->counter, &packets, &bytes); + if (err) + return err; + + seq_printf(file, "0x%llx\n", packets); + return 0; +} + +static int bytes_show(struct seq_file *file, void *priv) +{ + struct mlx5_vdpa_counter *counter = file->private; + u64 packets; + u64 bytes; + int err; + + err = mlx5_fc_query(counter->mdev, counter->counter, &packets, &bytes); + if (err) + return err; + + seq_printf(file, "0x%llx\n", bytes); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(packets); +DEFINE_SHOW_ATTRIBUTE(bytes); + +static void add_counter_node(struct mlx5_vdpa_counter *counter, + struct dentry *parent) +{ + debugfs_create_file("packets", 0444, parent, counter, + &packets_fops); + debugfs_create_file("bytes", 0444, parent, counter, + &bytes_fops); +} + +void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node) +{ + static const char *ut = "untagged"; + char vidstr[9]; + u16 vid; + + node->ucast_counter.mdev = ndev->mvdev.mdev; + node->mcast_counter.mdev = ndev->mvdev.mdev; + if (node->tagged) { + vid = key2vid(node->macvlan); + snprintf(vidstr, sizeof(vidstr), "0x%x", vid); + } else { + strcpy(vidstr, ut); + } + + node->dent = debugfs_create_dir(vidstr, ndev->rx_dent); + if (IS_ERR(node->dent)) { + node->dent = NULL; + return; + } + + node->ucast_counter.dent = debugfs_create_dir("ucast", node->dent); + if (IS_ERR(node->ucast_counter.dent)) + return; + + add_counter_node(&node->ucast_counter, node->ucast_counter.dent); + + node->mcast_counter.dent = debugfs_create_dir("mcast", node->dent); + if (IS_ERR(node->mcast_counter.dent)) + return; + + add_counter_node(&node->mcast_counter, node->mcast_counter.dent); +} + +void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node) +{ + if (node->dent && ndev->debugfs) + debugfs_remove_recursive(node->dent); +} +#endif + +void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_core_dev *mdev; + + mdev = ndev->mvdev.mdev; + ndev->debugfs = debugfs_create_dir(dev_name(&ndev->mvdev.vdev.dev), + mlx5_debugfs_get_dev_root(mdev)); + if (!IS_ERR(ndev->debugfs)) + ndev->rx_dent = debugfs_create_dir("rx", ndev->debugfs); +} + +void mlx5_vdpa_remove_debugfs(struct mlx5_vdpa_net *ndev) +{ + debugfs_remove_recursive(ndev->debugfs); + ndev->debugfs = NULL; +} diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 3a6dbbc6440d..ddaa1366704b 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -7,6 +7,7 @@ #include <uapi/linux/virtio_net.h> #include <uapi/linux/virtio_ids.h> #include <uapi/linux/vdpa.h> +#include <uapi/linux/vhost_types.h> #include <linux/virtio_config.h> #include <linux/auxiliary_bus.h> #include <linux/mlx5/cq.h> @@ -18,15 +19,12 @@ #include <linux/mlx5/mlx5_ifc_vdpa.h> #include <linux/mlx5/mpfs.h> #include "mlx5_vdpa.h" +#include "mlx5_vnet.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox VDPA driver"); MODULE_LICENSE("Dual BSD/GPL"); -#define to_mlx5_vdpa_ndev(__mvdev) \ - container_of(__mvdev, struct mlx5_vdpa_net, mvdev) -#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) - #define VALID_FEATURES_MASK \ (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ @@ -50,13 +48,17 @@ MODULE_LICENSE("Dual BSD/GPL"); #define MLX5V_UNTAGGED 0x1000 -struct mlx5_vdpa_net_resources { - u32 tisn; - u32 tdn; - u32 tirn; - u32 rqtn; - bool valid; -}; +/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section + * 5.1.6.5.5 "Device operation in multiqueue mode": + * + * Multiqueue is disabled by default. + * The driver enables multiqueue by sending a command using class + * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue + * operation, as follows: ... + */ +#define MLX5V_DEFAULT_VQ_COUNT 2 + +#define MLX5V_DEFAULT_VQ_SIZE 256 struct mlx5_vdpa_cq_buf { struct mlx5_frag_buf_ctrl fbc; @@ -94,6 +96,7 @@ struct mlx5_vq_restore_info { u64 driver_addr; u16 avail_index; u16 used_index; + struct msi_map map; bool ready; bool restore; }; @@ -130,6 +133,13 @@ struct mlx5_vdpa_virtqueue { u16 used_idx; int fw_state; + u64 modified_fields; + + struct mlx5_vdpa_mr *vq_mr; + struct mlx5_vdpa_mr *desc_mr; + + struct msi_map map; + /* keep last in the struct */ struct mlx5_vq_restore_info ri; }; @@ -146,47 +156,14 @@ static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) return idx <= mvdev->max_idx; } -#define MLX5V_MACVLAN_SIZE 256 - -struct mlx5_vdpa_net { - struct mlx5_vdpa_dev mvdev; - struct mlx5_vdpa_net_resources res; - struct virtio_net_config config; - struct mlx5_vdpa_virtqueue *vqs; - struct vdpa_callback *event_cbs; - - /* Serialize vq resources creation and destruction. This is required - * since memory map might change and we need to destroy and create - * resources while driver in operational. - */ - struct rw_semaphore reslock; - struct mlx5_flow_table *rxft; - bool setup; - u32 cur_num_vqs; - u32 rqt_size; - bool nb_registered; - struct notifier_block nb; - struct vdpa_callback config_cb; - struct mlx5_vdpa_wq_ent cvq_ent; - struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; -}; - -struct macvlan_node { - struct hlist_node hlist; - struct mlx5_flow_handle *ucast_rule; - struct mlx5_flow_handle *mcast_rule; - u64 macvlan; -}; - -static void free_resources(struct mlx5_vdpa_net *ndev); -static void init_mvqs(struct mlx5_vdpa_net *ndev); -static int setup_driver(struct mlx5_vdpa_dev *mvdev); -static void teardown_driver(struct mlx5_vdpa_net *ndev); +static void free_fixed_resources(struct mlx5_vdpa_net *ndev); +static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev); +static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled); +static void teardown_vq_resources(struct mlx5_vdpa_net *ndev); +static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq); static bool mlx5_vdpa_debug; -#define MLX5_CVQ_MAX_ENT 16 - #define MLX5_LOG_VIO_FLAG(_feature) \ do { \ if (features & BIT_ULL(_feature)) \ @@ -596,6 +573,8 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) vcq->mcq.set_ci_db = vcq->db.db; vcq->mcq.arm_db = vcq->db.db + 1; vcq->mcq.cqe_sz = 64; + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); if (err) @@ -621,7 +600,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) /* Use vector 0 by default. Consider adding code to choose least used * vector. */ - err = mlx5_vector2eqn(mdev, 0, &eqn); + err = mlx5_comp_eqn_get(mdev, 0, &eqn); if (err) goto err_vec; @@ -635,10 +614,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) if (err) goto err_vec; - vcq->mcq.comp = mlx5_vdpa_cq_comp; - vcq->cqe = num_ent; - vcq->mcq.set_ci_db = vcq->db.db; - vcq->mcq.arm_db = vcq->db.db + 1; mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); kfree(in); return 0; @@ -666,30 +641,70 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) mlx5_db_free(ndev->mvdev.mdev, &vcq->db); } +static int read_umem_params(struct mlx5_vdpa_net *ndev) +{ + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int out_size; + void *caps; + void *out; + int err; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, + "Failed reading vdpa umem capabilities with err %d\n", err); + goto out; + } + + caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); + ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); + + ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); + ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); + + ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); + ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); + +out: + kfree(out); + return 0; +} + static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, struct mlx5_vdpa_umem **umemp) { - struct mlx5_core_dev *mdev = ndev->mvdev.mdev; - int p_a; - int p_b; + u32 p_a; + u32 p_b; switch (num) { case 1: - p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); - p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); + p_a = ndev->umem_1_buffer_param_a; + p_b = ndev->umem_1_buffer_param_b; *umemp = &mvq->umem1; break; case 2: - p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); - p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); + p_a = ndev->umem_2_buffer_param_a; + p_b = ndev->umem_2_buffer_param_b; *umemp = &mvq->umem2; break; case 3: - p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); - p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); + p_a = ndev->umem_3_buffer_param_a; + p_b = ndev->umem_3_buffer_param_b; *umemp = &mvq->umem3; break; } + (*umemp)->size = p_a * mvq->num_ent + p_b; } @@ -821,12 +836,28 @@ static bool vq_is_tx(u16 idx) return idx % 2; } -static u16 get_features_12_3(u64 features) +enum { + MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, + MLX5_VIRTIO_NET_F_HOST_ECN = 4, + MLX5_VIRTIO_NET_F_GUEST_ECN = 6, + MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, + MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, + MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, + MLX5_VIRTIO_NET_F_CSUM = 10, + MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, + MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, +}; + +static u16 get_features(u64 features) { - return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) | - (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) | - (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) | - (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6); + return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | + (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | + (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | + (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | + (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | + (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | + (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | + (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); } static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) @@ -835,11 +866,25 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); } -static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) +{ + return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && + pci_msix_can_alloc_dyn(mvdev->mdev->pdev); +} + +static int create_virtqueue(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + bool filled) { int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_vdpa_mr *vq_mr; + struct mlx5_vdpa_mr *vq_desc_mr; + u64 features = filled ? mvdev->actual_features : mvdev->mlx_features; void *obj_context; + u16 mlx_features; void *cmd_hdr; void *vq_ctx; void *in; @@ -855,6 +900,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque goto err_alloc; } + mlx_features = get_features(features); cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); @@ -862,26 +908,58 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); - MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); - MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, - get_features_12_3(ndev->mvdev.actual_features)); + mlx_features >> 3); + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, + mlx_features & 7); vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); if (vq_is_tx(mvq->index)) MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); - MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + if (mvq->map.virq) { + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); + } else { + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); + } + MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, - !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); - MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); - MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); - MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); - MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); + !!(features & BIT_ULL(VIRTIO_F_VERSION_1))); + + if (filled) { + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); + + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + if (vq_mr) + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); + + vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + if (vq_desc_mr && + MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) + MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); + } else { + /* If there is no mr update, make sure that the existing ones are set + * modify to ready. + */ + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + if (vq_mr) + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; + + vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + if (vq_desc_mr) + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; + } + MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); @@ -900,6 +978,17 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque kfree(in); mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + if (filled) { + mlx5_vdpa_get_mr(mvdev, vq_mr); + mvq->vq_mr = vq_mr; + + if (vq_desc_mr && + MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { + mlx5_vdpa_get_mr(mvdev, vq_desc_mr); + mvq->desc_mr = vq_desc_mr; + } + } + return 0; err_cmd: @@ -926,6 +1015,12 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq } mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; umems_destroy(ndev, mvq); + + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr); + mvq->vq_mr = NULL; + + mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr); + mvq->desc_mr = NULL; } static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) @@ -1087,44 +1182,101 @@ struct mlx5_virtq_attr { u16 used_index; }; -static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, - struct mlx5_virtq_attr *attr) -{ - int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); - u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; - void *out; - void *obj_context; - void *cmd_hdr; - int err; +struct mlx5_virtqueue_query_mem { + u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)]; + u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)]; +}; - out = kzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; +struct mlx5_virtqueue_modify_mem { + u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)]; + u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)]; +}; - cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); +static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_virtqueue_query_mem *cmd) +{ + void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); - err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); - if (err) - goto err_cmd; +} + +static void query_virtqueue_end(struct mlx5_vdpa_net *ndev, + struct mlx5_virtqueue_query_mem *cmd, + struct mlx5_virtq_attr *attr) +{ + void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context); - obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); memset(attr, 0, sizeof(*attr)); attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); - kfree(out); - return 0; +} -err_cmd: - kfree(out); +static int query_virtqueues(struct mlx5_vdpa_net *ndev, + int start_vq, + int num_vqs, + struct mlx5_virtq_attr *attrs) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_virtqueue_query_mem *cmd_mem; + struct mlx5_vdpa_async_cmd *cmds; + int err = 0; + + WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n", + start_vq, start_vq + num_vqs, mvdev->max_vqs); + + cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) { + err = -ENOMEM; + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + cmds[i].in = &cmd_mem[i].in; + cmds[i].inlen = sizeof(cmd_mem[i].in); + cmds[i].out = &cmd_mem[i].out; + cmds[i].outlen = sizeof(cmd_mem[i].out); + fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]); + } + + err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n", + start_vq, start_vq + num_vqs, err); + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + int vq_idx = start_vq + i; + + if (cmd->err) { + mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, cmd->err); + if (!err) + err = cmd->err; + continue; + } + + query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]); + } + +done: + kvfree(cmd_mem); + kvfree(cmds); return err; } -static bool is_valid_state_change(int oldstate, int newstate) +static bool is_resumable(struct mlx5_vdpa_net *ndev) +{ + return ndev->mvdev.vdev.config->resume; +} + +static bool is_valid_state_change(int oldstate, int newstate, bool resumable) { switch (oldstate) { case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: @@ -1132,48 +1284,122 @@ static bool is_valid_state_change(int oldstate, int newstate) case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: + return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false; case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: default: return false; } } -static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) +static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) { - int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); - u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; - void *obj_context; - void *cmd_hdr; - void *in; - int err; + /* Only state is always modifiable */ + if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE) + return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT || + mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; - if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) - return 0; - - if (!is_valid_state_change(mvq->fw_state, state)) - return -EINVAL; + return true; +} - in = kzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; +static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state, + struct mlx5_virtqueue_modify_mem *cmd) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_vdpa_mr *desc_mr = NULL; + struct mlx5_vdpa_mr *vq_mr = NULL; + void *obj_context; + void *cmd_hdr; + void *vq_ctx; - cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); + cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); - obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); - MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, - MLX5_VIRTQ_MODIFY_MASK_STATE); - MLX5_SET(virtio_net_q_object, obj_context, state, state); - err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); - kfree(in); - if (!err) + obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context); + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) + MLX5_SET(virtio_net_q_object, obj_context, state, state); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX) + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION) + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, + !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) { + u16 mlx_features = get_features(ndev->mvdev.actual_features); + + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, + mlx_features >> 3); + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, + mlx_features & 7); + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + + if (vq_mr) + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); + else + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + + if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) + MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); + else + mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; + } + + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); +} + +static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]; + struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid]; + + mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); + mlx5_vdpa_get_mr(mvdev, vq_mr); + mvq->vq_mr = vq_mr; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]; + struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid]; + + mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); + mlx5_vdpa_get_mr(mvdev, desc_mr); + mvq->desc_mr = desc_mr; + } + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) mvq->fw_state = state; - return err; + mvq->modified_fields = 0; } static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) @@ -1217,13 +1443,62 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); } -static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) { - u16 idx = mvq->index; + struct vdpa_callback *cb = priv; + + if (cb->callback) + return cb->callback(cb->private); + + return IRQ_HANDLED; +} + +static void alloc_vector(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; + struct mlx5_vdpa_irq_pool_entry *ent; int err; + int i; - if (!mvq->num_ent) - return 0; + for (i = 0; i < irqp->num_ent; i++) { + ent = &irqp->entries[i]; + if (!ent->used) { + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", + dev_name(&ndev->mvdev.vdev.dev), mvq->index); + ent->dev_id = &ndev->event_cbs[mvq->index]; + err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, + ent->name, ent->dev_id); + if (err) + return; + + ent->used = true; + mvq->map = ent->map; + return; + } + } +} + +static void dealloc_vector(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; + int i; + + for (i = 0; i < irqp->num_ent; i++) + if (mvq->map.virq == irqp->entries[i].map.virq) { + free_irq(mvq->map.virq, irqp->entries[i].dev_id); + irqp->entries[i].used = false; + return; + } +} + +static int setup_vq(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + bool filled) +{ + u16 idx = mvq->index; + int err; if (mvq->initialized) return 0; @@ -1246,27 +1521,29 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) err = counter_set_alloc(ndev, mvq); if (err) - goto err_counter; + goto err_connect; - err = create_virtqueue(ndev, mvq); + alloc_vector(ndev, mvq); + err = create_virtqueue(ndev, mvq, filled); if (err) - goto err_connect; + goto err_vq; + + mvq->initialized = true; if (mvq->ready) { - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); - if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", - idx, err); - goto err_connect; - } + err = resume_vq(ndev, mvq); + if (err) + goto err_modify; } - mvq->initialized = true; return 0; -err_connect: +err_modify: + destroy_virtqueue(ndev, mvq); +err_vq: + dealloc_vector(ndev, mvq); counter_set_dealloc(ndev, mvq); -err_counter: +err_connect: qp_destroy(ndev, &mvq->vqqp); err_vqqp: qp_destroy(ndev, &mvq->fwqp); @@ -1275,33 +1552,171 @@ err_fwqp: return err; } -static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state) { - struct mlx5_virtq_attr attr; + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_virtqueue_modify_mem *cmd_mem; + struct mlx5_vdpa_async_cmd *cmds; + int err = 0; + + WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n", + start_vq, start_vq + num_vqs, mvdev->max_vqs); + + cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) { + err = -ENOMEM; + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + struct mlx5_vdpa_virtqueue *mvq; + int vq_idx = start_vq + i; + mvq = &ndev->vqs[vq_idx]; + + if (!modifiable_virtqueue_fields(mvq)) { + err = -EINVAL; + goto done; + } + + if (mvq->fw_state != state) { + if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { + err = -EINVAL; + goto done; + } + + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; + } + + cmd->in = &cmd_mem[i].in; + cmd->inlen = sizeof(cmd_mem[i].in); + cmd->out = &cmd_mem[i].out; + cmd->outlen = sizeof(cmd_mem[i].out); + fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]); + } + + err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n", + start_vq, start_vq + num_vqs); + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + struct mlx5_vdpa_virtqueue *mvq; + int vq_idx = start_vq + i; + + mvq = &ndev->vqs[vq_idx]; + + if (cmd->err) { + mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n", + vq_idx, mvq->fw_state, state, err); + if (!err) + err = cmd->err; + continue; + } + + modify_virtqueue_end(ndev, mvq, state); + } + +done: + kvfree(cmd_mem); + kvfree(cmds); + return err; +} + +static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) +{ + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_virtq_attr *attrs; + int vq_idx, i; + int err; + + if (start_vq >= ndev->cur_num_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; if (!mvq->initialized) - return; + return 0; if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) - return; + return 0; - if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) - mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); + err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); + if (err) + return err; - if (query_virtqueue(ndev, mvq, &attr)) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); - return; + attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + err = query_virtqueues(ndev, start_vq, num_vqs, attrs); + if (err) + goto done; + + for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) { + mvq = &ndev->vqs[vq_idx]; + mvq->avail_idx = attrs[i].available_index; + mvq->used_idx = attrs[i].used_index; } - mvq->avail_idx = attr.available_index; - mvq->used_idx = attr.used_index; + +done: + kfree(attrs); + return err; } -static void suspend_vqs(struct mlx5_vdpa_net *ndev) +static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { - int i; + return suspend_vqs(ndev, mvq->index, 1); +} - for (i = 0; i < ndev->mvdev.max_vqs; i++) - suspend_vq(ndev, &ndev->vqs[i]); +static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) +{ + struct mlx5_vdpa_virtqueue *mvq; + int err; + + if (start_vq >= ndev->mvdev.max_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; + if (!mvq->initialized) + return 0; + + if (mvq->index >= ndev->cur_num_vqs) + return 0; + + switch (mvq->fw_state) { + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: + /* Due to a FW quirk we need to modify the VQ fields first then change state. + * This should be fixed soon. After that, a single command can be used. + */ + err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state); + if (err) + return err; + break; + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: + if (!is_resumable(ndev)) { + mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index); + return -EINVAL; + } + break; + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: + return 0; + default: + mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n", + mvq->index, mvq->fw_state); + return -EINVAL; + } + + return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); +} + +static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + return resume_vqs(ndev, mvq->index, 1); } static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) @@ -1310,7 +1725,9 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue * return; suspend_vq(ndev, mvq); + mvq->modified_fields = 0; destroy_virtqueue(ndev, mvq); + dealloc_vector(ndev, mvq); counter_set_dealloc(ndev, mvq); qp_destroy(ndev, &mvq->vqqp); qp_destroy(ndev, &mvq->fwqp); @@ -1431,36 +1848,85 @@ static int create_tir(struct mlx5_vdpa_net *ndev) err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); kfree(in); + if (err) + return err; + + mlx5_vdpa_add_tirn(ndev); return err; } static void destroy_tir(struct mlx5_vdpa_net *ndev) { + mlx5_vdpa_remove_tirn(ndev); mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); } #define MAX_STEERING_ENT 0x8000 #define MAX_STEERING_GROUPS 2 +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) + #define NUM_DESTS 2 +#else + #define NUM_DESTS 1 +#endif + +static int add_steering_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dests) +{ +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) + int err; + + node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); + if (IS_ERR(node->ucast_counter.counter)) + return PTR_ERR(node->ucast_counter.counter); + + node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); + if (IS_ERR(node->mcast_counter.counter)) { + err = PTR_ERR(node->mcast_counter.counter); + goto err_mcast_counter; + } + + dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + return 0; + +err_mcast_counter: + mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); + return err; +#else + return 0; +#endif +} + +static void remove_steering_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node) +{ +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) + mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); + mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); +#endif +} + static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, - u16 vid, bool tagged, - struct mlx5_flow_handle **ucast, - struct mlx5_flow_handle **mcast) + struct macvlan_node *node) { - struct mlx5_flow_destination dest = {}; + struct mlx5_flow_destination dests[NUM_DESTS] = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; void *headers_c; void *headers_v; u8 *dmac_c; u8 *dmac_v; int err; + u16 vid; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; + vid = key2vid(node->macvlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); @@ -1472,44 +1938,58 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); } - if (tagged) { + if (node->tagged) { MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = ndev->res.tirn; - rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) - return PTR_ERR(rule); + dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dests[0].tir_num = ndev->res.tirn; + err = add_steering_counters(ndev, node, &flow_act, dests); + if (err) + goto out_free; + +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) + dests[1].counter = node->ucast_counter.counter; +#endif + node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); + if (IS_ERR(node->ucast_rule)) { + err = PTR_ERR(node->ucast_rule); + goto err_ucast; + } - *ucast = rule; +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) + dests[1].counter = node->mcast_counter.counter; +#endif memset(dmac_c, 0, ETH_ALEN); memset(dmac_v, 0, ETH_ALEN); dmac_c[0] = 1; dmac_v[0] = 1; - rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); - kvfree(spec); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); + node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); + if (IS_ERR(node->mcast_rule)) { + err = PTR_ERR(node->mcast_rule); goto err_mcast; } - - *mcast = rule; + kvfree(spec); + mlx5_vdpa_add_rx_counters(ndev, node); return 0; err_mcast: - mlx5_del_flow_rules(*ucast); + mlx5_del_flow_rules(node->ucast_rule); +err_ucast: + remove_steering_counters(ndev, node); +out_free: + kvfree(spec); return err; } static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, - struct mlx5_flow_handle *ucast, - struct mlx5_flow_handle *mcast) + struct macvlan_node *node) { - mlx5_del_flow_rules(ucast); - mlx5_del_flow_rules(mcast); + mlx5_vdpa_remove_rx_counters(ndev, node); + mlx5_del_flow_rules(node->ucast_rule); + mlx5_del_flow_rules(node->mcast_rule); } static u64 search_val(u8 *mac, u16 vlan, bool tagged) @@ -1543,14 +2023,14 @@ static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 valu return NULL; } -static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid +static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) { struct macvlan_node *ptr; u64 val; u32 idx; int err; - val = search_val(mac, vlan, tagged); + val = search_val(mac, vid, tagged); if (mac_vlan_lookup(ndev, val)) return -EEXIST; @@ -1558,12 +2038,13 @@ static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagg if (!ptr) return -ENOMEM; - err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged, - &ptr->ucast_rule, &ptr->mcast_rule); + ptr->tagged = tagged; + ptr->macvlan = val; + ptr->ndev = ndev; + err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); if (err) goto err_add; - ptr->macvlan = val; idx = hash_64(val, 8); hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); return 0; @@ -1582,7 +2063,8 @@ static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tag return; hlist_del(&ptr->hlist); - mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule); + mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); + remove_steering_counters(ndev, ptr); kfree(ptr); } @@ -1595,7 +2077,8 @@ static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { hlist_del(&pos->hlist); - mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule); + mlx5_vdpa_del_mac_vlan_rules(ndev, pos); + remove_steering_counters(ndev, pos); kfree(pos); } } @@ -1612,15 +2095,16 @@ static int setup_steering(struct mlx5_vdpa_net *ndev) ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n"); return -EOPNOTSUPP; } ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ndev->rxft)) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); + mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n"); return PTR_ERR(ndev->rxft); } + mlx5_vdpa_add_rx_flow_table(ndev); err = mac_vlan_add(ndev, ndev->config.mac, 0, false); if (err) @@ -1629,6 +2113,7 @@ static int setup_steering(struct mlx5_vdpa_net *ndev) return 0; err_add: + mlx5_vdpa_remove_rx_flow_table(ndev); mlx5_destroy_flow_table(ndev->rxft); return err; } @@ -1636,6 +2121,7 @@ err_add: static void teardown_steering(struct mlx5_vdpa_net *ndev) { clear_mac_vlan_table(ndev); + mlx5_vdpa_remove_rx_flow_table(ndev); mlx5_destroy_flow_table(ndev->rxft); } @@ -1731,37 +2217,48 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - int cur_qps = ndev->cur_num_vqs / 2; + int cur_vqs = ndev->cur_num_vqs; + int new_vqs = newqps * 2; int err; int i; - if (cur_qps > newqps) { - err = modify_rqt(ndev, 2 * newqps); + if (cur_vqs > new_vqs) { + err = modify_rqt(ndev, new_vqs); if (err) return err; - for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) - teardown_vq(ndev, &ndev->vqs[i]); + if (is_resumable(ndev)) { + suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs); + } else { + for (i = new_vqs; i < cur_vqs; i++) + teardown_vq(ndev, &ndev->vqs[i]); + } - ndev->cur_num_vqs = 2 * newqps; + ndev->cur_num_vqs = new_vqs; } else { - ndev->cur_num_vqs = 2 * newqps; - for (i = cur_qps * 2; i < 2 * newqps; i++) { - err = setup_vq(ndev, &ndev->vqs[i]); + ndev->cur_num_vqs = new_vqs; + + for (i = cur_vqs; i < new_vqs; i++) { + err = setup_vq(ndev, &ndev->vqs[i], false); if (err) goto clean_added; } - err = modify_rqt(ndev, 2 * newqps); + + err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs); + if (err) + goto clean_added; + + err = modify_rqt(ndev, new_vqs); if (err) goto clean_added; } return 0; clean_added: - for (--i; i >= 2 * cur_qps; --i) + for (--i; i >= cur_vqs; --i) teardown_vq(ndev, &ndev->vqs[i]); - ndev->cur_num_vqs = 2 * cur_qps; + ndev->cur_num_vqs = cur_vqs; return err; } @@ -1971,6 +2468,7 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_ mvq->desc_addr = desc_area; mvq->device_addr = device_area; mvq->driver_addr = driver_area; + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS; return 0; } @@ -1980,10 +2478,18 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) + if (!is_index_valid(mvdev, idx)) return; + if (is_ctrl_vq_idx(mvdev, idx)) { + struct mlx5_control_vq *cvq = &mvdev->cvq; + + cvq->vring.vring.num = num; + return; + } + mvq = &ndev->vqs[idx]; + ndev->needs_teardown |= num != mvq->num_ent; mvq->num_ent = num; } @@ -2023,7 +2529,6 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - int err; if (!mvdev->actual_features) return; @@ -2039,15 +2544,11 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready mvq = &ndev->vqs[idx]; if (!ready) { suspend_vq(ndev, mvq); - } else { - err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); - if (err) { - mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); + } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) { + if (resume_vq(ndev, mvq)) ready = false; - } } - mvq->ready = ready; } @@ -2088,6 +2589,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, mvq->used_idx = state->split.avail_index; mvq->avail_idx = state->split.avail_index; + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX; return 0; } @@ -2121,9 +2624,9 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa return 0; } - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) { - mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); + mlx5_vdpa_err(mvdev, "failed to query virtqueue\n"); return err; } state->split.avail_index = attr.used_index; @@ -2145,23 +2648,37 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) return MLX5_VDPA_DATAVQ_GROUP; } -enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, - MLX5_VIRTIO_NET_F_CSUM = 1 << 10, - MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, - MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12, -}; +static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + + if (is_ctrl_vq_idx(mvdev, idx)) + return MLX5_VDPA_CVQ_GROUP; + + return MLX5_VDPA_DATAVQ_DESC_GROUP; +} static u64 mlx_to_vritio_features(u16 dev_features) { u64 result = 0; - if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM) + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) + result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) + result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) + result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) + result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) + result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); - if (dev_features & MLX5_VIRTIO_NET_F_CSUM) + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) result |= BIT_ULL(VIRTIO_NET_F_CSUM); - if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6) + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); - if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4) + if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); return result; @@ -2183,6 +2700,7 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev) mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); return mlx_vdpa_features; } @@ -2218,14 +2736,14 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) return 0; } -static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) +static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; int i; for (i = 0; i < mvdev->max_vqs; i++) { - err = setup_vq(ndev, &ndev->vqs[i]); + err = setup_vq(ndev, &ndev->vqs[i], filled); if (err) goto err_vq; } @@ -2241,16 +2759,10 @@ err_vq: static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) { - struct mlx5_vdpa_virtqueue *mvq; int i; - for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { - mvq = &ndev->vqs[i]; - if (!mvq->initialized) - continue; - - teardown_vq(ndev, mvq); - } + for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) + teardown_vq(ndev, &ndev->vqs[i]); } static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) @@ -2271,10 +2783,127 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) } } +static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; + int err; + + MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); + + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); + if (err) + return 0; + + return MLX5_GET(query_vport_state_out, out, state); +} + +static bool get_link_state(struct mlx5_vdpa_dev *mvdev) +{ + if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == + VPORT_STATE_UP) + return true; + + return false; +} + +static void update_carrier(struct work_struct *work) +{ + struct mlx5_vdpa_wq_ent *wqent; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); + mvdev = wqent->mvdev; + ndev = to_mlx5_vdpa_ndev(mvdev); + if (get_link_state(mvdev)) + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + else + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + + if (ndev->config_cb.callback) + ndev->config_cb.callback(ndev->config_cb.private); + + kfree(wqent); +} + +static int queue_link_work(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_wq_ent *wqent; + + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); + if (!wqent) + return -ENOMEM; + + wqent->mvdev = &ndev->mvdev; + INIT_WORK(&wqent->work, update_carrier); + queue_work(ndev->mvdev.wq, &wqent->work); + return 0; +} + +static int event_handler(struct notifier_block *nb, unsigned long event, void *param) +{ + struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); + struct mlx5_eqe *eqe = param; + int ret = NOTIFY_DONE; + + if (ndev->mvdev.suspended) + return NOTIFY_DONE; + + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + if (queue_link_work(ndev)) + return NOTIFY_DONE; + + ret = NOTIFY_OK; + break; + default: + return NOTIFY_DONE; + } + return ret; + } + return ret; +} + +static void register_link_notifier(struct mlx5_vdpa_net *ndev) +{ + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) + return; + + ndev->nb.notifier_call = event_handler; + mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); + ndev->nb_registered = true; + queue_link_work(ndev); +} + +static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) +{ + if (!ndev->nb_registered) + return; + + ndev->nb_registered = false; + mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); + if (ndev->mvdev.wq) + flush_workqueue(ndev->mvdev.wq); +} + +static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa) +{ + return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); +} + static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + u64 old_features = mvdev->actual_features; + u64 diff_features; int err; print_features(mvdev, features, true); @@ -2284,12 +2913,26 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) return err; ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; - if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) - ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); - else - ndev->rqt_size = 1; - ndev->cur_num_vqs = 2 * ndev->rqt_size; + /* Interested in changes of vq features only. */ + if (get_features(old_features) != get_features(mvdev->actual_features)) { + for (int i = 0; i < mvdev->max_vqs; ++i) { + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; + + mvq->modified_fields |= ( + MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION | + MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES + ); + } + } + + /* When below features diverge from initial device features, VQs need a full teardown. */ +#define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \ + BIT_ULL(VIRTIO_NET_F_CSUM) | \ + BIT_ULL(VIRTIO_F_VERSION_1)) + + diff_features = mvdev->mlx_features ^ mvdev->actual_features; + ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK); update_cvq_info(mvdev); return err; @@ -2335,7 +2978,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu int err; if (mvq->initialized) { - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) return err; } @@ -2347,6 +2990,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu ri->desc_addr = mvq->desc_addr; ri->device_addr = mvq->device_addr; ri->driver_addr = mvq->driver_addr; + ri->map = mvq->map; ri->restore = true; return 0; } @@ -2377,7 +3021,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) int i; mlx5_clear_vqs(ndev); - init_mvqs(ndev); + mvqs_set_defaults(ndev); for (i = 0; i < ndev->mvdev.max_vqs; i++) { mvq = &ndev->vqs[i]; ri = &mvq->ri; @@ -2391,46 +3035,52 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) mvq->desc_addr = ri->desc_addr; mvq->device_addr = ri->device_addr; mvq->driver_addr = ri->driver_addr; + mvq->map = ri->map; } } static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, unsigned int asid) + struct mlx5_vdpa_mr *new_mr, + unsigned int asid) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool teardown = !is_resumable(ndev); int err; - suspend_vqs(ndev); - err = save_channels_info(ndev); - if (err) - goto err_mr; + suspend_vqs(ndev, 0, ndev->cur_num_vqs); + if (teardown) { + err = save_channels_info(ndev); + if (err) + return err; - teardown_driver(ndev); - mlx5_vdpa_destroy_mr(mvdev); - err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); - if (err) - goto err_mr; + teardown_vq_resources(ndev); + } - if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - goto err_mr; + mlx5_vdpa_update_mr(mvdev, new_mr, asid); - restore_channels_info(ndev); - err = setup_driver(mvdev); - if (err) - goto err_setup; + for (int i = 0; i < mvdev->max_vqs; i++) + ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; - return 0; + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) + return 0; -err_setup: - mlx5_vdpa_destroy_mr(mvdev); -err_mr: - return err; + if (teardown) { + restore_channels_info(ndev); + err = setup_vq_resources(ndev, true); + if (err) + return err; + } + + resume_vqs(ndev, 0, ndev->cur_num_vqs); + + return 0; } /* reslock must be held for this function */ -static int setup_driver(struct mlx5_vdpa_dev *mvdev) +static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled) { - struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; int err; WARN_ON(!rwsem_is_locked(&ndev->reslock)); @@ -2440,10 +3090,16 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) err = 0; goto out; } - err = setup_virtqueues(mvdev); + mlx5_vdpa_add_debugfs(ndev); + + err = read_umem_params(ndev); + if (err) + goto err_setup; + + err = setup_virtqueues(mvdev, filled); if (err) { mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); - goto out; + goto err_setup; } err = create_rqt(ndev); @@ -2473,12 +3129,14 @@ err_tir: destroy_rqt(ndev); err_rqt: teardown_virtqueues(ndev); +err_setup: + mlx5_vdpa_remove_debugfs(ndev); out: return err; } /* reslock must be held for this function */ -static void teardown_driver(struct mlx5_vdpa_net *ndev) +static void teardown_vq_resources(struct mlx5_vdpa_net *ndev) { WARN_ON(!rwsem_is_locked(&ndev->reslock)); @@ -2486,21 +3144,13 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) if (!ndev->setup) return; + mlx5_vdpa_remove_debugfs(ndev); teardown_steering(ndev); destroy_tir(ndev); destroy_rqt(ndev); teardown_virtqueues(ndev); ndev->setup = false; -} - -static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) -{ - int i; - - for (i = 0; i < ndev->mvdev.max_vqs; i++) - ndev->vqs[i].ready = false; - - ndev->mvdev.cvq.ready = false; + ndev->needs_teardown = false; } static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) @@ -2508,13 +3158,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) struct mlx5_control_vq *cvq = &mvdev->cvq; int err = 0; - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { + u16 idx = cvq->vring.last_avail_idx; + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, + cvq->vring.vring.num, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); + if (!err) + cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; + } return err; } @@ -2535,10 +3190,23 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); goto err_setup; } - err = setup_driver(mvdev); - if (err) { - mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); - goto err_setup; + register_link_notifier(ndev); + + if (ndev->needs_teardown) + teardown_vq_resources(ndev); + + if (ndev->setup) { + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to resume VQs\n"); + goto err_driver; + } + } else { + err = setup_vq_resources(ndev, true); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); + goto err_driver; + } } } else { mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); @@ -2550,8 +3218,10 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) up_write(&ndev->reslock); return; +err_driver: + unregister_link_notifier(ndev); err_setup: - mlx5_vdpa_destroy_mr(&ndev->mvdev); + mlx5_vdpa_clean_mrs(&ndev->mvdev); ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; err_clear: up_write(&ndev->reslock); @@ -2563,23 +3233,51 @@ static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) /* default mapping all groups are mapped to asid 0 */ for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) - mvdev->group2asid[i] = 0; + mvdev->mres.group2asid[i] = 0; } -static int mlx5_vdpa_reset(struct vdpa_device *vdev) +static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0]; + + if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) + return true; + + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT) + return true; + + return mvq->modified_fields & ( + MLX5_VIRTQ_MODIFY_MASK_STATE | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX + ); +} + +static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool vq_reset; print_status(mvdev, 0, true); mlx5_vdpa_info(mvdev, "performing device reset\n"); down_write(&ndev->reslock); - teardown_driver(ndev); - clear_vqs_ready(ndev); - mlx5_vdpa_destroy_mr(&ndev->mvdev); + unregister_link_notifier(ndev); + vq_reset = needs_vqs_reset(mvdev); + if (vq_reset) { + teardown_vq_resources(ndev); + mvqs_set_defaults(ndev); + } + + if (flags & VDPA_RESET_F_CLEAN_MAP) + mlx5_vdpa_clean_mrs(&ndev->mvdev); ndev->mvdev.status = 0; - ndev->cur_num_vqs = 0; + ndev->mvdev.suspended = false; + ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; + ndev->mvdev.cvq.ready = false; ndev->mvdev.cvq.received_desc = 0; ndev->mvdev.cvq.completed_desc = 0; memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); @@ -2587,15 +3285,23 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) init_group_to_asid_map(mvdev); ++mvdev->generation; - if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) - mlx5_vdpa_warn(mvdev, "create MR failed\n"); + if ((flags & VDPA_RESET_F_CLEAN_MAP) && + MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { + if (mlx5_vdpa_create_dma_mr(mvdev)) + mlx5_vdpa_err(mvdev, "create MR failed\n"); } + if (vq_reset) + setup_vq_resources(ndev, false); up_write(&ndev->reslock); return 0; } +static int mlx5_vdpa_reset(struct vdpa_device *vdev) +{ + return mlx5_vdpa_compat_reset(vdev, 0); +} + static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) { return sizeof(struct virtio_net_config); @@ -2627,18 +3333,38 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, unsigned int asid) { - bool change_map; + struct mlx5_vdpa_mr *new_mr; int err; - err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); - if (err) { - mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); - return err; + if (asid >= MLX5_VDPA_NUM_AS) + return -EINVAL; + + if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) { + new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); + if (IS_ERR(new_mr)) { + err = PTR_ERR(new_mr); + mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err); + return err; + } + } else { + /* Empty iotlbs don't have an mr but will clear the previous mr. */ + new_mr = NULL; } - if (change_map) - err = mlx5_vdpa_change_map(mvdev, iotlb, asid); + if (!mvdev->mres.mr[asid]) { + mlx5_vdpa_update_mr(mvdev, new_mr, asid); + } else { + err = mlx5_vdpa_change_map(mvdev, new_mr, asid); + if (err) { + mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err); + goto out_err; + } + } + + return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); +out_err: + mlx5_vdpa_put_mr(mvdev, new_mr); return err; } @@ -2655,6 +3381,50 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, return err; } +static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + down_write(&ndev->reslock); + err = mlx5_vdpa_reset_mr(mvdev, asid); + up_write(&ndev->reslock); + return err; +} + +static union virtio_map mlx5_get_vq_map(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + union virtio_map map; + + if (is_ctrl_vq_idx(mvdev, idx)) + map.dma_dev = &vdev->dev; + else + map.dma_dev = mvdev->vdev.vmap.dma_dev; + + return map; +} + +static void free_irqs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_irq_pool_entry *ent; + int i; + + if (!msix_mode_supported(&ndev->mvdev)) + return; + + if (!ndev->irqp.entries) + return; + + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { + ent = ndev->irqp.entries + i; + if (ent->map.virq) + pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); + } + kfree(ndev->irqp.entries); +} + static void mlx5_vdpa_free(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2663,13 +3433,19 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) ndev = to_mlx5_vdpa_ndev(mvdev); - free_resources(ndev); - mlx5_vdpa_destroy_mr(mvdev); + /* Functions called here should be able to work with + * uninitialized resources. + */ + free_fixed_resources(ndev); + mlx5_vdpa_clean_mrs(mvdev); + mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); if (!is_zero_ether_addr(ndev->config.mac)) { pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); } + mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx); mlx5_vdpa_free_resources(&ndev->mvdev); + free_irqs(ndev); kfree(ndev->event_cbs); kfree(ndev->vqs); } @@ -2698,9 +3474,23 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device return ret; } -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) { - return -EOPNOTSUPP; + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq; + + if (!is_index_valid(mvdev, idx)) + return -EINVAL; + + if (is_ctrl_vq_idx(mvdev, idx)) + return -EOPNOTSUPP; + + mvq = &ndev->vqs[idx]; + if (!mvq->map.virq) + return -EOPNOTSUPP; + + return mvq->map.virq; } static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) @@ -2812,32 +3602,55 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq; - int i; + int err; + + mlx5_vdpa_info(mvdev, "suspending device\n"); down_write(&ndev->reslock); - ndev->nb_registered = false; - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); - flush_workqueue(ndev->mvdev.wq); - for (i = 0; i < ndev->cur_num_vqs; i++) { - mvq = &ndev->vqs[i]; - suspend_vq(ndev, mvq); - } + err = suspend_vqs(ndev, 0, ndev->cur_num_vqs); mlx5_vdpa_cvq_suspend(mvdev); + mvdev->suspended = true; up_write(&ndev->reslock); - return 0; + + return err; +} + +static int mlx5_vdpa_resume(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev; + int err; + + ndev = to_mlx5_vdpa_ndev(mvdev); + + mlx5_vdpa_info(mvdev, "resuming device\n"); + + down_write(&ndev->reslock); + mvdev->suspended = false; + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); + queue_link_work(ndev); + up_write(&ndev->reslock); + + return err; } static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, unsigned int asid) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + int err = 0; if (group >= MLX5_VDPA_NUMVQ_GROUPS) return -EINVAL; - mvdev->group2asid[group] = asid; - return 0; + mvdev->mres.group2asid[group] = asid; + + mutex_lock(&mvdev->mres.lock); + if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid]) + err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid); + mutex_unlock(&mvdev->mres.lock); + + return err; } static const struct vdpa_config_ops mlx5_vdpa_ops = { @@ -2854,7 +3667,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vq_irq = mlx5_get_vq_irq, .get_vq_align = mlx5_vdpa_get_vq_align, .get_vq_group = mlx5_vdpa_get_vq_group, + .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */ .get_device_features = mlx5_vdpa_get_device_features, + .get_backend_features = mlx5_vdpa_get_backend_features, .set_driver_features = mlx5_vdpa_set_driver_features, .get_driver_features = mlx5_vdpa_get_driver_features, .set_config_cb = mlx5_vdpa_set_config_cb, @@ -2864,14 +3679,18 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_status = mlx5_vdpa_get_status, .set_status = mlx5_vdpa_set_status, .reset = mlx5_vdpa_reset, + .compat_reset = mlx5_vdpa_compat_reset, .get_config_size = mlx5_vdpa_get_config_size, .get_config = mlx5_vdpa_get_config, .set_config = mlx5_vdpa_set_config, .get_generation = mlx5_vdpa_get_generation, .set_map = mlx5_vdpa_set_map, + .reset_map = mlx5_vdpa_reset_map, .set_group_asid = mlx5_set_group_asid, + .get_vq_map = mlx5_get_vq_map, .free = mlx5_vdpa_free, .suspend = mlx5_vdpa_suspend, + .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ }; static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) @@ -2887,7 +3706,7 @@ static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) return 0; } -static int alloc_resources(struct mlx5_vdpa_net *ndev) +static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev) { struct mlx5_vdpa_net_resources *res = &ndev->res; int err; @@ -2914,7 +3733,7 @@ err_tis: return err; } -static void free_resources(struct mlx5_vdpa_net *ndev) +static void free_fixed_resources(struct mlx5_vdpa_net *ndev) { struct mlx5_vdpa_net_resources *res = &ndev->res; @@ -2926,7 +3745,7 @@ static void free_resources(struct mlx5_vdpa_net *ndev) res->valid = false; } -static void init_mvqs(struct mlx5_vdpa_net *ndev) +static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev) { struct mlx5_vdpa_virtqueue *mvq; int i; @@ -2938,12 +3757,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) mvq->ndev = ndev; mvq->fwqp.fw = true; mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; - } - for (; i < ndev->mvdev.max_vqs; i++) { - mvq = &ndev->vqs[i]; - memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); - mvq->index = i; - mvq->ndev = ndev; + mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE; } } @@ -2951,84 +3765,9 @@ struct mlx5_vdpa_mgmtdev { struct vdpa_mgmt_dev mgtdev; struct mlx5_adev *madev; struct mlx5_vdpa_net *ndev; + struct vdpa_config_ops vdpa_ops; }; -static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) -{ - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; - int err; - - MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); - MLX5_SET(query_vport_state_in, in, op_mod, opmod); - MLX5_SET(query_vport_state_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vport_state_in, in, other_vport, 1); - - err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); - if (err) - return 0; - - return MLX5_GET(query_vport_state_out, out, state); -} - -static bool get_link_state(struct mlx5_vdpa_dev *mvdev) -{ - if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == - VPORT_STATE_UP) - return true; - - return false; -} - -static void update_carrier(struct work_struct *work) -{ - struct mlx5_vdpa_wq_ent *wqent; - struct mlx5_vdpa_dev *mvdev; - struct mlx5_vdpa_net *ndev; - - wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); - mvdev = wqent->mvdev; - ndev = to_mlx5_vdpa_ndev(mvdev); - if (get_link_state(mvdev)) - ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); - else - ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); - - if (ndev->nb_registered && ndev->config_cb.callback) - ndev->config_cb.callback(ndev->config_cb.private); - - kfree(wqent); -} - -static int event_handler(struct notifier_block *nb, unsigned long event, void *param) -{ - struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); - struct mlx5_eqe *eqe = param; - int ret = NOTIFY_DONE; - struct mlx5_vdpa_wq_ent *wqent; - - if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); - if (!wqent) - return NOTIFY_DONE; - - wqent->mvdev = &ndev->mvdev; - INIT_WORK(&wqent->work, update_carrier); - queue_work(ndev->mvdev.wq, &wqent->work); - ret = NOTIFY_OK; - break; - default: - return NOTIFY_DONE; - } - return ret; - } - return ret; -} - static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); @@ -3051,6 +3790,34 @@ static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) return err; } +static void allocate_irqs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_irq_pool_entry *ent; + int i; + + if (!msix_mode_supported(&ndev->mvdev)) + return; + + if (!ndev->mvdev.mdev->pdev) + return; + + ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); + if (!ndev->irqp.entries) + return; + + + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + ent = ndev->irqp.entries + i; + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", + dev_name(&ndev->mvdev.vdev.dev), i); + ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); + if (!ent->map.virq) + return; + + ndev->irqp.num_ent++; + } +} + static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, const struct vdpa_dev_set_config *add_config) { @@ -3060,6 +3827,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; + u64 device_features; u32 max_vqs; u16 mtu; int err; @@ -3068,6 +3836,26 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, return -ENOSPC; mdev = mgtdev->madev->mdev; + device_features = mgtdev->mgtdev.supported_features; + if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { + if (add_config->device_features & ~device_features) { + dev_warn(mdev->device, + "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", + add_config->device_features, device_features); + return -EINVAL; + } + device_features &= add_config->device_features; + } else { + device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); + } + if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && + device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { + dev_warn(mdev->device, + "Must provision minimum features 0x%llx for this device", + BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); + return -EOPNOTSUPP; + } + if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { dev_warn(mdev->device, "missing support for split virtqueues\n"); @@ -3091,15 +3879,19 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, max_vqs = 2; } - ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, - MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); + ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, + NULL, MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); - ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features; ndev->mvdev.max_vqs = max_vqs; mvdev = &ndev->mvdev; mvdev->mdev = mdev; + /* cpu_to_mlx5vdpa16() below depends on this flag */ + mvdev->actual_features = + (device_features & BIT_ULL(VIRTIO_F_VERSION_1)); + + mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx); ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); @@ -3107,8 +3899,10 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, err = -ENOMEM; goto err_alloc; } + ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; - init_mvqs(ndev); + mvqs_set_defaults(ndev); + allocate_irqs(ndev); init_rwsem(&ndev->reslock); config = &ndev->config; @@ -3118,20 +3912,26 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, goto err_alloc; } - err = query_mtu(mdev, &mtu); - if (err) - goto err_alloc; + if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { + err = query_mtu(mdev, &mtu); + if (err) + goto err_alloc; - ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); + ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); + } - if (get_link_state(mvdev)) - ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); - else - ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { + if (get_link_state(mvdev)) + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + else + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + } if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); - } else { + /* No bother setting mac address in config if not going to provision _F_MAC */ + } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || + device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); if (err) goto err_alloc; @@ -3142,56 +3942,80 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, err = mlx5_mpfs_add_mac(pfmdev, config->mac); if (err) goto err_alloc; + } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { + /* + * We used to clear _F_MAC feature bit if seeing + * zero mac address when device features are not + * specifically provisioned. Keep the behaviour + * so old scripts do not break. + */ + device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); + } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { + /* Don't provision zero mac address for _F_MAC */ + mlx5_vdpa_warn(&ndev->mvdev, + "No mac address provisioned?\n"); + err = -EINVAL; + goto err_alloc; + } - ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); + if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) { + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); + ndev->rqt_size = max_vqs / 2; + } else { + ndev->rqt_size = 1; } - config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); - mvdev->vdev.dma_dev = &mdev->pdev->dev; + ndev->mvdev.mlx_features = device_features; + mvdev->vdev.vmap.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) - goto err_mpfs; + goto err_alloc; + + err = mlx5_vdpa_init_mr_resources(mvdev); + if (err) + goto err_alloc; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - err = mlx5_vdpa_create_mr(mvdev, NULL, 0); + err = mlx5_vdpa_create_dma_mr(mvdev); if (err) - goto err_res; + goto err_alloc; } - err = alloc_resources(ndev); + err = alloc_fixed_resources(ndev); if (err) - goto err_mr; + goto err_alloc; ndev->cvq_ent.mvdev = mvdev; INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); if (!mvdev->wq) { err = -ENOMEM; - goto err_res2; + goto err_alloc; } - ndev->nb.notifier_call = event_handler; - mlx5_notifier_register(mdev, &ndev->nb); - ndev->nb_registered = true; mvdev->vdev.mdev = &mgtdev->mgtdev; err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); if (err) goto err_reg; mgtdev->ndev = ndev; + + /* For virtio-vdpa, the device was set up during device register. */ + if (ndev->setup) + return 0; + + down_write(&ndev->reslock); + err = setup_vq_resources(ndev, false); + up_write(&ndev->reslock); + if (err) + goto err_setup_vq_res; + return 0; +err_setup_vq_res: + _vdpa_unregister_device(&mvdev->vdev); err_reg: destroy_workqueue(mvdev->wq); -err_res2: - free_resources(ndev); -err_mr: - mlx5_vdpa_destroy_mr(mvdev); -err_res: - mlx5_vdpa_free_resources(&ndev->mvdev); -err_mpfs: - if (!is_zero_ether_addr(config->mac)) - mlx5_mpfs_del_mac(pfmdev, config->mac); err_alloc: put_device(&mvdev->vdev.dev); return err; @@ -3204,20 +4028,50 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct workqueue_struct *wq; - if (ndev->nb_registered) { - ndev->nb_registered = false; - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); - } + unregister_link_notifier(ndev); + _vdpa_unregister_device(dev); + + down_write(&ndev->reslock); + teardown_vq_resources(ndev); + up_write(&ndev->reslock); + wq = mvdev->wq; mvdev->wq = NULL; destroy_workqueue(wq); - _vdpa_unregister_device(dev); mgtdev->ndev = NULL; } +static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *add_config) +{ + struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + struct mlx5_core_dev *mdev; + int err = -EOPNOTSUPP; + + mvdev = to_mvdev(dev); + ndev = to_mlx5_vdpa_ndev(mvdev); + mdev = mvdev->mdev; + config = &ndev->config; + + down_write(&ndev->reslock); + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (!err) + ether_addr_copy(config->mac, add_config->net.mac); + } + + up_write(&ndev->reslock); + return err; +} + static const struct vdpa_mgmtdev_ops mdev_ops = { .dev_add = mlx5_vdpa_dev_add, .dev_del = mlx5_vdpa_dev_del, + .dev_set_attr = mlx5_vdpa_set_attr, }; static struct virtio_device_id id_table[] = { @@ -3243,11 +4097,19 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.id_table = id_table; mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | - BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU); + BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | + BIT_ULL(VDPA_ATTR_DEV_FEATURES); mgtdev->mgtdev.max_supported_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; mgtdev->mgtdev.supported_features = get_supported_features(mdev); mgtdev->madev = madev; + mgtdev->vdpa_ops = mlx5_vdpa_ops; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) + mgtdev->vdpa_ops.get_vq_desc_group = NULL; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported)) + mgtdev->vdpa_ops.resume = NULL; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); if (err) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h new file mode 100644 index 000000000000..00e79a7d0be8 --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_VNET_H__ +#define __MLX5_VNET_H__ + +#include "mlx5_vdpa.h" + +#define to_mlx5_vdpa_ndev(__mvdev) \ + container_of(__mvdev, struct mlx5_vdpa_net, mvdev) +#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) + +struct mlx5_vdpa_net_resources { + u32 tisn; + u32 tdn; + u32 tirn; + u32 rqtn; + bool valid; + struct dentry *tirn_dent; +}; + +#define MLX5V_MACVLAN_SIZE 256 + +static inline u16 key2vid(u64 key) +{ + return (u16)(key >> 48) & 0xfff; +} + +#define MLX5_VDPA_IRQ_NAME_LEN 32 + +struct mlx5_vdpa_irq_pool_entry { + struct msi_map map; + bool used; + char name[MLX5_VDPA_IRQ_NAME_LEN]; + void *dev_id; +}; + +struct mlx5_vdpa_irq_pool { + int num_ent; + struct mlx5_vdpa_irq_pool_entry *entries; +}; + +struct mlx5_vdpa_net { + struct mlx5_vdpa_dev mvdev; + struct mlx5_vdpa_net_resources res; + struct virtio_net_config config; + struct mlx5_vdpa_virtqueue *vqs; + struct vdpa_callback *event_cbs; + + /* Serialize vq resources creation and destruction. This is required + * since memory map might change and we need to destroy and create + * resources while driver in operational. + */ + struct rw_semaphore reslock; + struct mlx5_flow_table *rxft; + struct dentry *rx_dent; + struct dentry *rx_table_dent; + bool setup; + bool needs_teardown; + u32 cur_num_vqs; + u32 rqt_size; + bool nb_registered; + struct notifier_block nb; + struct vdpa_callback config_cb; + struct mlx5_vdpa_wq_ent cvq_ent; + struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; + struct mlx5_vdpa_irq_pool irqp; + struct dentry *debugfs; + + u32 umem_1_buffer_param_a; + u32 umem_1_buffer_param_b; + + u32 umem_2_buffer_param_a; + u32 umem_2_buffer_param_b; + + u32 umem_3_buffer_param_a; + u32 umem_3_buffer_param_b; +}; + +struct mlx5_vdpa_counter { + struct mlx5_fc *counter; + struct dentry *dent; + struct mlx5_core_dev *mdev; +}; + +struct macvlan_node { + struct hlist_node hlist; + struct mlx5_flow_handle *ucast_rule; + struct mlx5_flow_handle *mcast_rule; + u64 macvlan; + struct mlx5_vdpa_net *ndev; + bool tagged; +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) + struct dentry *dent; + struct mlx5_vdpa_counter ucast_counter; + struct mlx5_vdpa_counter mcast_counter; +#endif +}; + +void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev); +void mlx5_vdpa_remove_debugfs(struct mlx5_vdpa_net *ndev); +void mlx5_vdpa_add_rx_flow_table(struct mlx5_vdpa_net *ndev); +void mlx5_vdpa_remove_rx_flow_table(struct mlx5_vdpa_net *ndev); +void mlx5_vdpa_add_tirn(struct mlx5_vdpa_net *ndev); +void mlx5_vdpa_remove_tirn(struct mlx5_vdpa_net *ndev); +#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) +void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node); +void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node); +#else +static inline void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node) {} +static inline void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev, + struct macvlan_node *node) {} +#endif + + +#endif /* __MLX5_VNET_H__ */ |
