diff options
Diffstat (limited to 'drivers/vdpa')
27 files changed, 3051 insertions, 395 deletions
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 656c1cb541de..559fb9d3271f 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -11,8 +11,7 @@ if VDPA config VDPA_SIM tristate "vDPA device simulator core" - depends on RUNTIME_TESTING_MENU && HAS_DMA - select DMA_OPS + depends on RUNTIME_TESTING_MENU select VHOST_RING select IOMMU_IOVA help @@ -36,7 +35,12 @@ config VDPA_SIM_BLOCK config VDPA_USER tristate "VDUSE (vDPA Device in Userspace) support" depends on EVENTFD && MMU && HAS_DMA - select DMA_OPS + # + # This driver incorrectly tries to override the dma_ops. It should + # never have done that, but for now keep it working on architectures + # that use dma ops + # + depends on ARCH_HAS_DMA_OPS select VHOST_IOTLB select IOMMU_IOVA help @@ -126,4 +130,15 @@ config PDS_VDPA With this driver, the VirtIO dataplane can be offloaded to an AMD/Pensando DSC device. +config OCTEONEP_VDPA + tristate "vDPA driver for Octeon DPU devices" + depends on m + depends on PCI_MSI + help + This is a vDPA driver designed for Marvell's Octeon DPU devices. + This driver enables the offloading of the VirtIO dataplane to an + Octeon DPU device. + Please note that this driver must be built as a module and it + cannot be loaded until the Octeon emulation software is running. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 8f53c6f3cca7..5654d36707af 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_VP_VDPA) += virtio_pci/ obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/ obj-$(CONFIG_SNET_VDPA) += solidrun/ obj-$(CONFIG_PDS_VDPA) += pds/ +obj-$(CONFIG_OCTEONEP_VDPA) += octeon_ep/ diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index cce3d1837104..ad7f3447fe90 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -254,6 +254,13 @@ static u16 eni_vdpa_get_vq_num_min(struct vdpa_device *vdpa) return vp_legacy_get_queue_size(ldev, 0); } +static u16 eni_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 qid) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_queue_size(ldev, qid); +} + static int eni_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid, struct vdpa_vq_state *state) { @@ -416,6 +423,7 @@ static const struct vdpa_config_ops eni_vdpa_ops = { .reset = eni_vdpa_reset, .get_vq_num_max = eni_vdpa_get_vq_num_max, .get_vq_num_min = eni_vdpa_get_vq_num_min, + .get_vq_size = eni_vdpa_get_vq_size, .get_vq_state = eni_vdpa_get_vq_state, .set_vq_state = eni_vdpa_set_vq_state, .set_vq_cb = eni_vdpa_set_vq_cb, diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 060f837a4f9f..d5507b63b6cd 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -69,20 +69,19 @@ static int ifcvf_read_config_range(struct pci_dev *dev, return 0; } -static u16 ifcvf_get_vq_size(struct ifcvf_hw *hw, u16 qid) +u16 ifcvf_get_vq_size(struct ifcvf_hw *hw, u16 qid) { u16 queue_size; + if (qid >= hw->nr_vring) + return 0; + vp_iowrite16(qid, &hw->common_cfg->queue_select); queue_size = vp_ioread16(&hw->common_cfg->queue_size); return queue_size; } -/* This function returns the max allowed safe size for - * all virtqueues. It is the minimal size that can be - * suppprted by all virtqueues. - */ u16 ifcvf_get_max_vq_size(struct ifcvf_hw *hw) { u16 queue_size, max_size, qid; @@ -94,7 +93,7 @@ u16 ifcvf_get_max_vq_size(struct ifcvf_hw *hw) if (!queue_size) continue; - max_size = min(queue_size, max_size); + max_size = max(queue_size, max_size); } return max_size; @@ -109,7 +108,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) u32 i; ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); - if (ret < 0) { + if (ret) { IFCVF_ERR(pdev, "Failed to read PCI capability list\n"); return -EIO; } diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index b57849c643f6..aa36de361c10 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -28,6 +28,7 @@ #define IFCVF_PCI_MAX_RESOURCE 6 #define IFCVF_LM_BAR 4 +#define IFCVF_MIN_VQ_SIZE 64 #define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__) #define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__) @@ -111,15 +112,12 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset, const void *src, int length); u8 ifcvf_get_status(struct ifcvf_hw *hw); void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); -void io_write64_twopart(u64 val, u32 *lo, u32 *hi); void ifcvf_reset(struct ifcvf_hw *hw); u64 ifcvf_get_dev_features(struct ifcvf_hw *hw); u64 ifcvf_get_hw_features(struct ifcvf_hw *hw); int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features); u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); -struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); -int ifcvf_probed_virtio_net(struct ifcvf_hw *hw); u32 ifcvf_get_config_size(struct ifcvf_hw *hw); u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector); u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector); @@ -131,4 +129,5 @@ void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 qid, bool ready); void ifcvf_set_driver_features(struct ifcvf_hw *hw, u64 features); u64 ifcvf_get_driver_features(struct ifcvf_hw *hw); u16 ifcvf_get_max_vq_size(struct ifcvf_hw *hw); +u16 ifcvf_get_vq_size(struct ifcvf_hw *hw, u16 qid); #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index e98fa8100f3c..ccf64d7bbfaa 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -456,6 +456,11 @@ static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) return ifcvf_get_max_vq_size(vf); } +static u16 ifcvf_vdpa_get_vq_num_min(struct vdpa_device *vdpa_dev) +{ + return IFCVF_MIN_VQ_SIZE; +} + static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, struct vdpa_vq_state *state) { @@ -597,6 +602,14 @@ static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, return -EINVAL; } +static u16 ifcvf_vdpa_get_vq_size(struct vdpa_device *vdpa_dev, + u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ifcvf_get_vq_size(vf, qid); +} + static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_device *vdpa_dev, u16 idx) { @@ -624,6 +637,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .set_status = ifcvf_vdpa_set_status, .reset = ifcvf_vdpa_reset, .get_vq_num_max = ifcvf_vdpa_get_vq_num_max, + .get_vq_num_min = ifcvf_vdpa_get_vq_num_min, .get_vq_state = ifcvf_vdpa_get_vq_state, .set_vq_state = ifcvf_vdpa_set_vq_state, .set_vq_cb = ifcvf_vdpa_set_vq_cb, @@ -632,6 +646,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .set_vq_num = ifcvf_vdpa_set_vq_num, .set_vq_address = ifcvf_vdpa_set_vq_address, .get_vq_irq = ifcvf_vdpa_get_vq_irq, + .get_vq_size = ifcvf_vdpa_get_vq_size, .kick_vq = ifcvf_vdpa_kick_vq, .get_generation = ifcvf_vdpa_get_generation, .get_device_id = ifcvf_vdpa_get_device_id, @@ -879,4 +894,5 @@ static struct pci_driver ifcvf_driver = { module_pci_driver(ifcvf_driver); +MODULE_DESCRIPTION("Intel IFC VF NIC driver for virtio dataplane offloading"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 50aac8fe57ef..2cedf7e2dbc4 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -83,10 +83,28 @@ enum { MLX5_VDPA_NUM_AS = 2 }; +struct mlx5_vdpa_mr_resources { + struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; + unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; + + /* Pre-deletion mr list */ + struct list_head mr_list_head; + + /* Deferred mr list */ + struct list_head mr_gc_list_head; + struct workqueue_struct *wq_gc; + struct delayed_work gc_dwork_ent; + + struct mutex lock; + + atomic_t shutdown; +}; + struct mlx5_vdpa_dev { struct vdpa_device vdev; struct mlx5_core_dev *mdev; struct mlx5_vdpa_resources res; + struct mlx5_vdpa_mr_resources mres; u64 mlx_features; u64 actual_features; @@ -95,14 +113,23 @@ struct mlx5_vdpa_dev { u16 max_idx; u32 generation; - struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS]; - struct list_head mr_list_head; - /* serialize mr access */ - struct mutex mr_mtx; struct mlx5_control_vq cvq; struct workqueue_struct *wq; - unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; bool suspended; + + struct mlx5_async_ctx async_ctx; +}; + +struct mlx5_vdpa_async_cmd { + int err; + struct mlx5_async_work cb_work; + struct completion cmd_done; + + void *in; + size_t inlen; + + void *out; + size_t outlen; }; int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); @@ -121,7 +148,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); +int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr); void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, @@ -134,6 +163,14 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, unsigned int asid); int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev); int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid); +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int num_cmds); + +#define mlx5_vdpa_err(__dev, format, ...) \ + dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 4758914ccf86..8455f08f5d40 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -49,17 +49,23 @@ static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) } } -static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) -{ - int inlen; +struct mlx5_create_mkey_mem { + u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; + u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; + __be64 mtt[]; +}; + +struct mlx5_destroy_mkey_mem { + u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)]; + u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)]; +}; + +static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) +{ + void *in = &mem->in; void *mkc; - void *in; - int err; - - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); @@ -76,18 +82,36 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(mr->end - mr->start, mr->log_size)); populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); - err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); - kvfree(in); - if (err) { - mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); - return err; - } - return 0; + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); +} + +static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_create_mkey_mem *mem) +{ + u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); + + mr->mr = mlx5_idx_to_mkey(mkey_index); +} + +static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_direct_mr *mr, + struct mlx5_destroy_mkey_mem *mem) +{ + void *in = &mem->in; + + MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr)); } static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { + if (!mr->mr) + return; + mlx5_vdpa_destroy_mkey(mvdev, mr->mr); } @@ -179,6 +203,123 @@ static int klm_byte_size(int nklms) return 16 * ALIGN(nklms, 4); } +#define MLX5_VDPA_MTT_ALIGN 16 + +static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + struct mlx5_vdpa_async_cmd *cmds; + struct mlx5_vdpa_direct_mr *dmr; + int err = 0; + int i = 0; + + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); + if (!cmds) + return -ENOMEM; + + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_create_mkey_mem *cmd_mem; + int mttlen, mttcount; + + mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN); + mttcount = mttlen / sizeof(cmd_mem->mtt[0]); + cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); + if (!cmd_mem) { + err = -ENOMEM; + goto done; + } + + cmds[i].out = cmd_mem->out; + cmds[i].outlen = sizeof(cmd_mem->out); + cmds[i].in = cmd_mem->in; + cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); + + fill_create_direct_mr(mvdev, dmr, cmd_mem); + + i++; + } + + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); + if (err) { + + mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); + goto done; + } + + i = 0; + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); + + if (!cmd->err) { + create_direct_mr_end(mvdev, dmr, cmd_mem); + } else { + err = err ? err : cmd->err; + mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", + dmr->start, dmr->end, cmd->err); + } + } + +done: + for (i = i-1; i >= 0; i--) { + struct mlx5_create_mkey_mem *cmd_mem; + + cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); + kvfree(cmd_mem); + } + + kvfree(cmds); + return err; +} + +DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T)) +DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T)) + +static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL; + struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL; + struct mlx5_vdpa_direct_mr *dmr; + int err = 0; + int i = 0; + + cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) + return -ENOMEM; + + list_for_each_entry(dmr, &mr->head, list) { + cmds[i].out = cmd_mem[i].out; + cmds[i].outlen = sizeof(cmd_mem[i].out); + cmds[i].in = cmd_mem[i].in; + cmds[i].inlen = sizeof(cmd_mem[i].in); + fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]); + i++; + } + + err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); + if (err) { + + mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err); + return err; + } + + i = 0; + list_for_each_entry(dmr, &mr->head, list) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; + + dmr->mr = 0; + if (cmd->err) { + err = err ? err : cmd->err; + mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n", + dmr->start, dmr->end, cmd->err); + } + } + + return err; +} + static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { int inlen; @@ -227,21 +368,19 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr unsigned long lgcd = 0; int log_entity_size; unsigned long size; - u64 start = 0; int err; struct page *pg; unsigned int nsg; int sglen; - u64 pa; + u64 pa, offset; u64 paend; struct scatterlist *sg; struct device *dma = mvdev->vdev.dma_dev; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); - map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { + map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { size = maplen(map, mr); lgcd = gcd(lgcd, size); - start += size; } log_entity_size = ilog2(lgcd); @@ -255,8 +394,10 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr sg = mr->sg_head.sgl; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { - paend = map->addr + maplen(map, mr); - for (pa = map->addr; pa < paend; pa += sglen) { + offset = mr->start > map->start ? mr->start - map->start : 0; + pa = map->addr + offset; + paend = map->addr + offset + maplen(map, mr); + for (; pa < paend; pa += sglen) { pg = pfn_to_page(__phys_to_pfn(pa)); if (!sg) { mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", @@ -279,14 +420,8 @@ done: goto err_map; } - err = create_direct_mr(mvdev, mr); - if (err) - goto err_direct; - return 0; -err_direct: - dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); err_map: sg_free_table(&mr->sg_head); return err; @@ -401,6 +536,10 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto err_chain; + err = create_direct_keys(mvdev, mr); + if (err) + goto err_chain; + /* Create the memory key that defines the guests's address space. This * memory key refers to the direct keys that contain the MTT * translations @@ -489,6 +628,7 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr struct mlx5_vdpa_direct_mr *n; destroy_indirect_key(mvdev, mr); + destroy_direct_keys(mvdev, mr); list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { list_del_init(&dmr->list); unmap_direct_mr(mvdev, dmr); @@ -513,22 +653,58 @@ static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_ kfree(mr); } +/* There can be multiple .set_map() operations in quick succession. + * This large delay is a simple way to prevent the MR cleanup from blocking + * .set_map() MR creation in this scenario. + */ +#define MLX5_VDPA_MR_GC_TRIGGER_MS 2000 + +static void mlx5_vdpa_mr_gc_handler(struct work_struct *work) +{ + struct mlx5_vdpa_mr_resources *mres; + struct mlx5_vdpa_mr *mr, *tmp; + struct mlx5_vdpa_dev *mvdev; + + mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work); + + if (atomic_read(&mres->shutdown)) { + mutex_lock(&mres->lock); + } else if (!mutex_trylock(&mres->lock)) { + queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, + msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); + return; + } + + mvdev = container_of(mres, struct mlx5_vdpa_dev, mres); + + list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) { + _mlx5_vdpa_destroy_mr(mvdev, mr); + } + + mutex_unlock(&mres->lock); +} + static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + if (!mr) return; - if (refcount_dec_and_test(&mr->refcount)) - _mlx5_vdpa_destroy_mr(mvdev, mr); + if (refcount_dec_and_test(&mr->refcount)) { + list_move_tail(&mr->mr_list, &mres->mr_gc_list_head); + queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, + msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); + } } void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); _mlx5_vdpa_put_mr(mvdev, mr); - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, @@ -543,44 +719,47 @@ static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); _mlx5_vdpa_get_mr(mvdev, mr); - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *new_mr, unsigned int asid) { - struct mlx5_vdpa_mr *old_mr = mvdev->mr[asid]; + struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid]; - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); _mlx5_vdpa_put_mr(mvdev, old_mr); - mvdev->mr[asid] = new_mr; + mvdev->mres.mr[asid] = new_mr; - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_mr *mr; - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); - list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) { + list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) { mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " "mr: %p, mkey: 0x%x, refcount: %u\n", mr, mr->mkey, refcount_read(&mr->refcount)); } - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); } -void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) +void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev) { + if (!mvdev->res.valid) + return; + for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) mlx5_vdpa_update_mr(mvdev, NULL, i); @@ -613,7 +792,7 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (err) goto err_iotlb; - list_add_tail(&mr->mr_list, &mvdev->mr_list_head); + list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head); return 0; @@ -639,9 +818,9 @@ struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, if (!mr) return ERR_PTR(-ENOMEM); - mutex_lock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb); - mutex_unlock(&mvdev->mr_mtx); + mutex_unlock(&mvdev->mres.lock); if (err) goto out_err; @@ -661,7 +840,7 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, { int err; - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid) return 0; spin_lock(&mvdev->cvq.iommu_lock); @@ -703,3 +882,33 @@ int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) return 0; } + +int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + + mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc"); + if (!mres->wq_gc) + return -ENOMEM; + + INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler); + + mutex_init(&mres->lock); + + INIT_LIST_HEAD(&mres->mr_list_head); + INIT_LIST_HEAD(&mres->mr_gc_list_head); + + return 0; +} + +void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; + + atomic_set(&mres->shutdown, 1); + + flush_delayed_work(&mres->gc_dwork_ent); + destroy_workqueue(mres->wq_gc); + mres->wq_gc = NULL; + mutex_destroy(&mres->lock); +} diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index 5c5a41b64bfc..aeae31d0cefa 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -256,7 +256,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) mlx5_vdpa_warn(mvdev, "resources already allocated\n"); return -EINVAL; } - mutex_init(&mvdev->mr_mtx); res->uar = mlx5_get_uars_page(mdev); if (IS_ERR(res->uar)) { err = PTR_ERR(res->uar); @@ -301,7 +300,6 @@ err_pd: err_uctx: mlx5_put_uars_page(mdev, res->uar); err_uars: - mutex_destroy(&mvdev->mr_mtx); return err; } @@ -318,6 +316,78 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) dealloc_pd(mvdev, res->pdn, res->uid); destroy_uctx(mvdev, res->uid); mlx5_put_uars_page(mvdev->mdev, res->uar); - mutex_destroy(&mvdev->mr_mtx); res->valid = false; } + +static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5_vdpa_async_cmd *cmd = + container_of(context, struct mlx5_vdpa_async_cmd, cb_work); + + cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out); + complete(&cmd->cmd_done); +} + +static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int issued, + int *completed) + +{ + struct mlx5_vdpa_async_cmd *cmd = &cmds[issued]; + int err; + +retry: + err = mlx5_cmd_exec_cb(&mvdev->async_ctx, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen, + virtqueue_cmd_callback, + &cmd->cb_work); + if (err == -EBUSY) { + if (*completed < issued) { + /* Throttled by own commands: wait for oldest completion. */ + wait_for_completion(&cmds[*completed].cmd_done); + (*completed)++; + + goto retry; + } else { + /* Throttled by external commands: switch to sync api. */ + err = mlx5_cmd_exec(mvdev->mdev, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen); + if (!err) + (*completed)++; + } + } + + return err; +} + +int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev, + struct mlx5_vdpa_async_cmd *cmds, + int num_cmds) +{ + int completed = 0; + int issued = 0; + int err = 0; + + for (int i = 0; i < num_cmds; i++) + init_completion(&cmds[i].cmd_done); + + while (issued < num_cmds) { + + err = issue_async_cmd(mvdev, cmds, issued, &completed); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n", + issued, num_cmds, err); + break; + } + + issued++; + } + + while (completed < issued) + wait_for_completion(&cmds[completed++].cmd_done); + + return err; +} diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 778821bab7d9..36099047560d 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -48,6 +48,18 @@ MODULE_LICENSE("Dual BSD/GPL"); #define MLX5V_UNTAGGED 0x1000 +/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section + * 5.1.6.5.5 "Device operation in multiqueue mode": + * + * Multiqueue is disabled by default. + * The driver enables multiqueue by sending a command using class + * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue + * operation, as follows: ... + */ +#define MLX5V_DEFAULT_VQ_COUNT 2 + +#define MLX5V_DEFAULT_VQ_SIZE 256 + struct mlx5_vdpa_cq_buf { struct mlx5_frag_buf_ctrl fbc; struct mlx5_frag_buf frag_buf; @@ -144,15 +156,14 @@ static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) return idx <= mvdev->max_idx; } -static void free_resources(struct mlx5_vdpa_net *ndev); -static void init_mvqs(struct mlx5_vdpa_net *ndev); -static int setup_driver(struct mlx5_vdpa_dev *mvdev); -static void teardown_driver(struct mlx5_vdpa_net *ndev); +static void free_fixed_resources(struct mlx5_vdpa_net *ndev); +static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev); +static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled); +static void teardown_vq_resources(struct mlx5_vdpa_net *ndev); +static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq); static bool mlx5_vdpa_debug; -#define MLX5_CVQ_MAX_ENT 16 - #define MLX5_LOG_VIO_FLAG(_feature) \ do { \ if (features & BIT_ULL(_feature)) \ @@ -864,13 +875,16 @@ static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) pci_msix_can_alloc_dyn(mvdev->mdev->pdev); } -static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int create_virtqueue(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + bool filled) { int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; struct mlx5_vdpa_mr *vq_mr; struct mlx5_vdpa_mr *vq_desc_mr; + u64 features = filled ? mvdev->actual_features : mvdev->mlx_features; void *obj_context; u16 mlx_features; void *cmd_hdr; @@ -888,7 +902,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque goto err_alloc; } - mlx_features = get_features(ndev->mvdev.actual_features); + mlx_features = get_features(features); cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); @@ -896,8 +910,6 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); - MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); - MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, mlx_features >> 3); MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, @@ -919,17 +931,36 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, - !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); - MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); - MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); - MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); - vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; - if (vq_mr) - MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); - - vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; - if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) - MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); + !!(features & BIT_ULL(VIRTIO_F_VERSION_1))); + + if (filled) { + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); + + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + if (vq_mr) + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); + + vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + if (vq_desc_mr && + MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) + MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); + } else { + /* If there is no mr update, make sure that the existing ones are set + * modify to ready. + */ + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + if (vq_mr) + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; + + vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + if (vq_desc_mr) + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; + } MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); @@ -949,12 +980,15 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque kfree(in); mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); - mlx5_vdpa_get_mr(mvdev, vq_mr); - mvq->vq_mr = vq_mr; + if (filled) { + mlx5_vdpa_get_mr(mvdev, vq_mr); + mvq->vq_mr = vq_mr; - if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { - mlx5_vdpa_get_mr(mvdev, vq_desc_mr); - mvq->desc_mr = vq_desc_mr; + if (vq_desc_mr && + MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { + mlx5_vdpa_get_mr(mvdev, vq_desc_mr); + mvq->desc_mr = vq_desc_mr; + } } return 0; @@ -1150,40 +1184,92 @@ struct mlx5_virtq_attr { u16 used_index; }; -static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, - struct mlx5_virtq_attr *attr) -{ - int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); - u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; - void *out; - void *obj_context; - void *cmd_hdr; - int err; +struct mlx5_virtqueue_query_mem { + u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)]; + u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)]; +}; - out = kzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; +struct mlx5_virtqueue_modify_mem { + u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)]; + u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)]; +}; - cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); +static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_virtqueue_query_mem *cmd) +{ + void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); - err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); - if (err) - goto err_cmd; +} + +static void query_virtqueue_end(struct mlx5_vdpa_net *ndev, + struct mlx5_virtqueue_query_mem *cmd, + struct mlx5_virtq_attr *attr) +{ + void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context); - obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); memset(attr, 0, sizeof(*attr)); attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); - kfree(out); - return 0; +} -err_cmd: - kfree(out); +static int query_virtqueues(struct mlx5_vdpa_net *ndev, + int start_vq, + int num_vqs, + struct mlx5_virtq_attr *attrs) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_virtqueue_query_mem *cmd_mem; + struct mlx5_vdpa_async_cmd *cmds; + int err = 0; + + WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n", + start_vq, start_vq + num_vqs, mvdev->max_vqs); + + cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) { + err = -ENOMEM; + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + cmds[i].in = &cmd_mem[i].in; + cmds[i].inlen = sizeof(cmd_mem[i].in); + cmds[i].out = &cmd_mem[i].out; + cmds[i].outlen = sizeof(cmd_mem[i].out); + fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]); + } + + err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n", + start_vq, start_vq + num_vqs, err); + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + int vq_idx = start_vq + i; + + if (cmd->err) { + mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, err); + if (!err) + err = cmd->err; + continue; + } + + query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]); + } + +done: + kvfree(cmd_mem); + kvfree(cmds); return err; } @@ -1217,51 +1303,30 @@ static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) return true; } -static int modify_virtqueue(struct mlx5_vdpa_net *ndev, - struct mlx5_vdpa_virtqueue *mvq, - int state) +static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state, + struct mlx5_virtqueue_modify_mem *cmd) { - int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); - u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; struct mlx5_vdpa_mr *desc_mr = NULL; struct mlx5_vdpa_mr *vq_mr = NULL; - bool state_change = false; void *obj_context; void *cmd_hdr; void *vq_ctx; - void *in; - int err; - - if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) - return 0; - - if (!modifiable_virtqueue_fields(mvq)) - return -EINVAL; - - in = kzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); + cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); - obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); + obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context); vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); - if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) { - if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { - err = -EINVAL; - goto done; - } - + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) MLX5_SET(virtio_net_q_object, obj_context, state, state); - state_change = true; - } if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); @@ -1275,8 +1340,21 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION) + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, + !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); + + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) { + u16 mlx_features = get_features(ndev->mvdev.actual_features); + + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, + mlx_features >> 3); + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, + mlx_features & 7); + } + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { - vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; + vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; if (vq_mr) MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); @@ -1285,7 +1363,7 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, } if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { - desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; + desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); @@ -1294,38 +1372,36 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, } MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); - err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); - if (err) - goto done; +} - if (state_change) - mvq->fw_state = state; +static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + int state) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { + unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]; + struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid]; + mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); mlx5_vdpa_get_mr(mvdev, vq_mr); mvq->vq_mr = vq_mr; } if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { + unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]; + struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid]; + mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); mlx5_vdpa_get_mr(mvdev, desc_mr); mvq->desc_mr = desc_mr; } - mvq->modified_fields = 0; - -done: - kfree(in); - return err; -} + if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) + mvq->fw_state = state; -static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev, - struct mlx5_vdpa_virtqueue *mvq, - unsigned int state) -{ - mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; - return modify_virtqueue(ndev, mvq, state); + mvq->modified_fields = 0; } static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) @@ -1419,14 +1495,13 @@ static void dealloc_vector(struct mlx5_vdpa_net *ndev, } } -static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int setup_vq(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + bool filled) { u16 idx = mvq->index; int err; - if (!mvq->num_ent) - return 0; - if (mvq->initialized) return 0; @@ -1451,20 +1526,18 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) goto err_connect; alloc_vector(ndev, mvq); - err = create_virtqueue(ndev, mvq); + err = create_virtqueue(ndev, mvq, filled); if (err) goto err_vq; + mvq->initialized = true; + if (mvq->ready) { - err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); - if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", - idx, err); + err = resume_vq(ndev, mvq); + if (err) goto err_modify; - } } - mvq->initialized = true; return 0; err_modify: @@ -1481,51 +1554,171 @@ err_fwqp: return err; } -static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state) { - struct mlx5_virtq_attr attr; + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_virtqueue_modify_mem *cmd_mem; + struct mlx5_vdpa_async_cmd *cmds; + int err = 0; + + WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n", + start_vq, start_vq + num_vqs, mvdev->max_vqs); + + cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); + cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); + if (!cmds || !cmd_mem) { + err = -ENOMEM; + goto done; + } + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + struct mlx5_vdpa_virtqueue *mvq; + int vq_idx = start_vq + i; + + mvq = &ndev->vqs[vq_idx]; + + if (!modifiable_virtqueue_fields(mvq)) { + err = -EINVAL; + goto done; + } + + if (mvq->fw_state != state) { + if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { + err = -EINVAL; + goto done; + } + + mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; + } + + cmd->in = &cmd_mem[i].in; + cmd->inlen = sizeof(cmd_mem[i].in); + cmd->out = &cmd_mem[i].out; + cmd->outlen = sizeof(cmd_mem[i].out); + fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]); + } + + err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n", + start_vq, start_vq + num_vqs); + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; + struct mlx5_vdpa_virtqueue *mvq; + int vq_idx = start_vq + i; + + mvq = &ndev->vqs[vq_idx]; + + if (cmd->err) { + mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n", + vq_idx, mvq->fw_state, state, err); + if (!err) + err = cmd->err; + continue; + } + + modify_virtqueue_end(ndev, mvq, state); + } + +done: + kvfree(cmd_mem); + kvfree(cmds); + return err; +} + +static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) +{ + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_virtq_attr *attrs; + int vq_idx, i; + int err; + + if (start_vq >= ndev->cur_num_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; if (!mvq->initialized) - return; + return 0; if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) - return; + return 0; - if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) - mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); + err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); + if (err) + return err; - if (query_virtqueue(ndev, mvq, &attr)) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); - return; + attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + err = query_virtqueues(ndev, start_vq, num_vqs, attrs); + if (err) + goto done; + + for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) { + mvq = &ndev->vqs[vq_idx]; + mvq->avail_idx = attrs[i].available_index; + mvq->used_idx = attrs[i].used_index; } - mvq->avail_idx = attr.available_index; - mvq->used_idx = attr.used_index; + +done: + kfree(attrs); + return err; } -static void suspend_vqs(struct mlx5_vdpa_net *ndev) +static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { - int i; - - for (i = 0; i < ndev->mvdev.max_vqs; i++) - suspend_vq(ndev, &ndev->vqs[i]); + return suspend_vqs(ndev, mvq->index, 1); } -static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) { - if (!mvq->initialized || !is_resumable(ndev)) - return; + struct mlx5_vdpa_virtqueue *mvq; + int err; - if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND) - return; + if (start_vq >= ndev->mvdev.max_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; + if (!mvq->initialized) + return 0; + + if (mvq->index >= ndev->cur_num_vqs) + return 0; + + switch (mvq->fw_state) { + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: + /* Due to a FW quirk we need to modify the VQ fields first then change state. + * This should be fixed soon. After that, a single command can be used. + */ + err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state); + if (err) + return err; + break; + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: + if (!is_resumable(ndev)) { + mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index); + return -EINVAL; + } + break; + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: + return 0; + default: + mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n", + mvq->index, mvq->fw_state); + return -EINVAL; + } - if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)) - mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index); + return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); } -static void resume_vqs(struct mlx5_vdpa_net *ndev) +static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { - for (int i = 0; i < ndev->mvdev.max_vqs; i++) - resume_vq(ndev, &ndev->vqs[i]); + return resume_vqs(ndev, mvq->index, 1); } static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) @@ -1759,7 +1952,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, goto out_free; #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) - dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); + dests[1].counter = node->ucast_counter.counter; #endif node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); if (IS_ERR(node->ucast_rule)) { @@ -1768,7 +1961,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, } #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) - dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); + dests[1].counter = node->mcast_counter.counter; #endif memset(dmac_c, 0, ETH_ALEN); @@ -1904,13 +2097,13 @@ static int setup_steering(struct mlx5_vdpa_net *ndev) ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n"); return -EOPNOTSUPP; } ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ndev->rxft)) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); + mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n"); return PTR_ERR(ndev->rxft); } mlx5_vdpa_add_rx_flow_table(ndev); @@ -2026,37 +2219,48 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - int cur_qps = ndev->cur_num_vqs / 2; + int cur_vqs = ndev->cur_num_vqs; + int new_vqs = newqps * 2; int err; int i; - if (cur_qps > newqps) { - err = modify_rqt(ndev, 2 * newqps); + if (cur_vqs > new_vqs) { + err = modify_rqt(ndev, new_vqs); if (err) return err; - for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) - teardown_vq(ndev, &ndev->vqs[i]); + if (is_resumable(ndev)) { + suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs); + } else { + for (i = new_vqs; i < cur_vqs; i++) + teardown_vq(ndev, &ndev->vqs[i]); + } - ndev->cur_num_vqs = 2 * newqps; + ndev->cur_num_vqs = new_vqs; } else { - ndev->cur_num_vqs = 2 * newqps; - for (i = cur_qps * 2; i < 2 * newqps; i++) { - err = setup_vq(ndev, &ndev->vqs[i]); + ndev->cur_num_vqs = new_vqs; + + for (i = cur_vqs; i < new_vqs; i++) { + err = setup_vq(ndev, &ndev->vqs[i], false); if (err) goto clean_added; } - err = modify_rqt(ndev, 2 * newqps); + + err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs); + if (err) + goto clean_added; + + err = modify_rqt(ndev, new_vqs); if (err) goto clean_added; } return 0; clean_added: - for (--i; i >= 2 * cur_qps; --i) + for (--i; i >= cur_vqs; --i) teardown_vq(ndev, &ndev->vqs[i]); - ndev->cur_num_vqs = 2 * cur_qps; + ndev->cur_num_vqs = cur_vqs; return err; } @@ -2276,10 +2480,18 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) + if (!is_index_valid(mvdev, idx)) return; + if (is_ctrl_vq_idx(mvdev, idx)) { + struct mlx5_control_vq *cvq = &mvdev->cvq; + + cvq->vring.vring.num = num; + return; + } + mvq = &ndev->vqs[idx]; + ndev->needs_teardown = num != mvq->num_ent; mvq->num_ent = num; } @@ -2319,7 +2531,6 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - int err; if (!mvdev->actual_features) return; @@ -2335,15 +2546,11 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready mvq = &ndev->vqs[idx]; if (!ready) { suspend_vq(ndev, mvq); - } else { - err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); - if (err) { - mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); + } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) { + if (resume_vq(ndev, mvq)) ready = false; - } } - mvq->ready = ready; } @@ -2419,9 +2626,9 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa return 0; } - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) { - mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); + mlx5_vdpa_err(mvdev, "failed to query virtqueue\n"); return err; } state->split.avail_index = attr.used_index; @@ -2531,14 +2738,14 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) return 0; } -static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) +static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; int i; for (i = 0; i < mvdev->max_vqs; i++) { - err = setup_vq(ndev, &ndev->vqs[i]); + err = setup_vq(ndev, &ndev->vqs[i], filled); if (err) goto err_vq; } @@ -2554,16 +2761,10 @@ err_vq: static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) { - struct mlx5_vdpa_virtqueue *mvq; int i; - for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { - mvq = &ndev->vqs[i]; - if (!mvq->initialized) - continue; - - teardown_vq(ndev, mvq); - } + for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) + teardown_vq(ndev, &ndev->vqs[i]); } static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) @@ -2652,6 +2853,9 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p struct mlx5_eqe *eqe = param; int ret = NOTIFY_DONE; + if (ndev->mvdev.suspended) + return NOTIFY_DONE; + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { switch (eqe->sub_type) { case MLX5_PORT_CHANGE_SUBTYPE_DOWN: @@ -2700,6 +2904,8 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + u64 old_features = mvdev->actual_features; + u64 diff_features; int err; print_features(mvdev, features, true); @@ -2709,20 +2915,26 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) return err; ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; - if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) - ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); - else - ndev->rqt_size = 1; - /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section - * 5.1.6.5.5 "Device operation in multiqueue mode": - * - * Multiqueue is disabled by default. - * The driver enables multiqueue by sending a command using class - * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue - * operation, as follows: ... - */ - ndev->cur_num_vqs = 2; + /* Interested in changes of vq features only. */ + if (get_features(old_features) != get_features(mvdev->actual_features)) { + for (int i = 0; i < mvdev->max_vqs; ++i) { + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; + + mvq->modified_fields |= ( + MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION | + MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES + ); + } + } + + /* When below features diverge from initial device features, VQs need a full teardown. */ +#define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \ + BIT_ULL(VIRTIO_NET_F_CSUM) | \ + BIT_ULL(VIRTIO_F_VERSION_1)) + + diff_features = mvdev->mlx_features ^ mvdev->actual_features; + ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK); update_cvq_info(mvdev); return err; @@ -2768,7 +2980,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu int err; if (mvq->initialized) { - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) return err; } @@ -2811,7 +3023,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) int i; mlx5_clear_vqs(ndev); - init_mvqs(ndev); + mvqs_set_defaults(ndev); for (i = 0; i < ndev->mvdev.max_vqs; i++) { mvq = &ndev->vqs[i]; ri = &mvq->ri; @@ -2837,18 +3049,18 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, bool teardown = !is_resumable(ndev); int err; - suspend_vqs(ndev); + suspend_vqs(ndev, 0, ndev->cur_num_vqs); if (teardown) { err = save_channels_info(ndev); if (err) return err; - teardown_driver(ndev); + teardown_vq_resources(ndev); } mlx5_vdpa_update_mr(mvdev, new_mr, asid); - for (int i = 0; i < ndev->cur_num_vqs; i++) + for (int i = 0; i < mvdev->max_vqs; i++) ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; @@ -2857,20 +3069,20 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, if (teardown) { restore_channels_info(ndev); - err = setup_driver(mvdev); + err = setup_vq_resources(ndev, true); if (err) return err; } - resume_vqs(ndev); + resume_vqs(ndev, 0, ndev->cur_num_vqs); return 0; } /* reslock must be held for this function */ -static int setup_driver(struct mlx5_vdpa_dev *mvdev) +static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled) { - struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; int err; WARN_ON(!rwsem_is_locked(&ndev->reslock)); @@ -2886,7 +3098,7 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) if (err) goto err_setup; - err = setup_virtqueues(mvdev); + err = setup_virtqueues(mvdev, filled); if (err) { mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); goto err_setup; @@ -2926,7 +3138,7 @@ out: } /* reslock must be held for this function */ -static void teardown_driver(struct mlx5_vdpa_net *ndev) +static void teardown_vq_resources(struct mlx5_vdpa_net *ndev) { WARN_ON(!rwsem_is_locked(&ndev->reslock)); @@ -2940,18 +3152,7 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) destroy_rqt(ndev); teardown_virtqueues(ndev); ndev->setup = false; -} - -static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) -{ - int i; - - for (i = 0; i < ndev->mvdev.max_vqs; i++) { - ndev->vqs[i].ready = false; - ndev->vqs[i].modified_fields = 0; - } - - ndev->mvdev.cvq.ready = false; + ndev->needs_teardown = false; } static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) @@ -2963,7 +3164,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) u16 idx = cvq->vring.last_avail_idx; err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, + cvq->vring.vring.num, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); @@ -2992,10 +3193,22 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) goto err_setup; } register_link_notifier(ndev); - err = setup_driver(mvdev); - if (err) { - mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); - goto err_driver; + + if (ndev->needs_teardown) + teardown_vq_resources(ndev); + + if (ndev->setup) { + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to resume VQs\n"); + goto err_driver; + } + } else { + err = setup_vq_resources(ndev, true); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); + goto err_driver; + } } } else { mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); @@ -3010,7 +3223,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) err_driver: unregister_link_notifier(ndev); err_setup: - mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); + mlx5_vdpa_clean_mrs(&ndev->mvdev); ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; err_clear: up_write(&ndev->reslock); @@ -3022,26 +3235,51 @@ static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) /* default mapping all groups are mapped to asid 0 */ for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) - mvdev->group2asid[i] = 0; + mvdev->mres.group2asid[i] = 0; +} + +static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0]; + + if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) + return true; + + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT) + return true; + + return mvq->modified_fields & ( + MLX5_VIRTQ_MODIFY_MASK_STATE | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX + ); } static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool vq_reset; print_status(mvdev, 0, true); mlx5_vdpa_info(mvdev, "performing device reset\n"); down_write(&ndev->reslock); unregister_link_notifier(ndev); - teardown_driver(ndev); - clear_vqs_ready(ndev); + vq_reset = needs_vqs_reset(mvdev); + if (vq_reset) { + teardown_vq_resources(ndev); + mvqs_set_defaults(ndev); + } + if (flags & VDPA_RESET_F_CLEAN_MAP) - mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); + mlx5_vdpa_clean_mrs(&ndev->mvdev); ndev->mvdev.status = 0; ndev->mvdev.suspended = false; - ndev->cur_num_vqs = 0; + ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; + ndev->mvdev.cvq.ready = false; ndev->mvdev.cvq.received_desc = 0; ndev->mvdev.cvq.completed_desc = 0; memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); @@ -3052,8 +3290,10 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) if ((flags & VDPA_RESET_F_CLEAN_MAP) && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { if (mlx5_vdpa_create_dma_mr(mvdev)) - mlx5_vdpa_warn(mvdev, "create MR failed\n"); + mlx5_vdpa_err(mvdev, "create MR failed\n"); } + if (vq_reset) + setup_vq_resources(ndev, false); up_write(&ndev->reslock); return 0; @@ -3105,7 +3345,7 @@ static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); if (IS_ERR(new_mr)) { err = PTR_ERR(new_mr); - mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err); + mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err); return err; } } else { @@ -3113,12 +3353,12 @@ static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, new_mr = NULL; } - if (!mvdev->mr[asid]) { + if (!mvdev->mres.mr[asid]) { mlx5_vdpa_update_mr(mvdev, new_mr, asid); } else { err = mlx5_vdpa_change_map(mvdev, new_mr, asid); if (err) { - mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err); + mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err); goto out_err; } } @@ -3192,8 +3432,11 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) ndev = to_mlx5_vdpa_ndev(mvdev); - free_resources(ndev); - mlx5_vdpa_destroy_mr_resources(mvdev); + free_fixed_resources(ndev); + mlx5_vdpa_clean_mrs(mvdev); + mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); + mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx); + if (!is_zero_ether_addr(ndev->config.mac)) { pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); @@ -3356,27 +3599,24 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq; - int i; + int err; mlx5_vdpa_info(mvdev, "suspending device\n"); down_write(&ndev->reslock); - unregister_link_notifier(ndev); - for (i = 0; i < ndev->cur_num_vqs; i++) { - mvq = &ndev->vqs[i]; - suspend_vq(ndev, mvq); - } + err = suspend_vqs(ndev, 0, ndev->cur_num_vqs); mlx5_vdpa_cvq_suspend(mvdev); mvdev->suspended = true; up_write(&ndev->reslock); - return 0; + + return err; } static int mlx5_vdpa_resume(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev; + int err; ndev = to_mlx5_vdpa_ndev(mvdev); @@ -3384,10 +3624,11 @@ static int mlx5_vdpa_resume(struct vdpa_device *vdev) down_write(&ndev->reslock); mvdev->suspended = false; - resume_vqs(ndev); - register_link_notifier(ndev); + err = resume_vqs(ndev, 0, ndev->cur_num_vqs); + queue_link_work(ndev); up_write(&ndev->reslock); - return 0; + + return err; } static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, @@ -3399,12 +3640,12 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, if (group >= MLX5_VDPA_NUMVQ_GROUPS) return -EINVAL; - mvdev->group2asid[group] = asid; + mvdev->mres.group2asid[group] = asid; - mutex_lock(&mvdev->mr_mtx); - if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid]) - err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid); - mutex_unlock(&mvdev->mr_mtx); + mutex_lock(&mvdev->mres.lock); + if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid]) + err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid); + mutex_unlock(&mvdev->mres.lock); return err; } @@ -3462,7 +3703,7 @@ static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) return 0; } -static int alloc_resources(struct mlx5_vdpa_net *ndev) +static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev) { struct mlx5_vdpa_net_resources *res = &ndev->res; int err; @@ -3489,7 +3730,7 @@ err_tis: return err; } -static void free_resources(struct mlx5_vdpa_net *ndev) +static void free_fixed_resources(struct mlx5_vdpa_net *ndev) { struct mlx5_vdpa_net_resources *res = &ndev->res; @@ -3501,7 +3742,7 @@ static void free_resources(struct mlx5_vdpa_net *ndev) res->valid = false; } -static void init_mvqs(struct mlx5_vdpa_net *ndev) +static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev) { struct mlx5_vdpa_virtqueue *mvq; int i; @@ -3513,12 +3754,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) mvq->ndev = ndev; mvq->fwqp.fw = true; mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; - } - for (; i < ndev->mvdev.max_vqs; i++) { - mvq = &ndev->vqs[i]; - memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); - mvq->index = i; - mvq->ndev = ndev; + mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE; } } @@ -3655,8 +3891,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, err = -ENOMEM; goto err_alloc; } + ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; - init_mvqs(ndev); + mvqs_set_defaults(ndev); allocate_irqs(ndev); init_rwsem(&ndev->reslock); config = &ndev->config; @@ -3713,33 +3950,41 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, goto err_alloc; } - if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) + if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) { config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); + ndev->rqt_size = max_vqs / 2; + } else { + ndev->rqt_size = 1; + } + + mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx); ndev->mvdev.mlx_features = device_features; mvdev->vdev.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) - goto err_mpfs; + goto err_alloc; - INIT_LIST_HEAD(&mvdev->mr_list_head); + err = mlx5_vdpa_init_mr_resources(mvdev); + if (err) + goto err_alloc; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { err = mlx5_vdpa_create_dma_mr(mvdev); if (err) - goto err_res; + goto err_alloc; } - err = alloc_resources(ndev); + err = alloc_fixed_resources(ndev); if (err) - goto err_mr; + goto err_alloc; ndev->cvq_ent.mvdev = mvdev; INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); if (!mvdev->wq) { err = -ENOMEM; - goto err_res2; + goto err_alloc; } mvdev->vdev.mdev = &mgtdev->mgtdev; @@ -3748,19 +3993,23 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, goto err_reg; mgtdev->ndev = ndev; + + /* For virtio-vdpa, the device was set up during device register. */ + if (ndev->setup) + return 0; + + down_write(&ndev->reslock); + err = setup_vq_resources(ndev, false); + up_write(&ndev->reslock); + if (err) + goto err_setup_vq_res; + return 0; +err_setup_vq_res: + _vdpa_unregister_device(&mvdev->vdev); err_reg: destroy_workqueue(mvdev->wq); -err_res2: - free_resources(ndev); -err_mr: - mlx5_vdpa_destroy_mr_resources(mvdev); -err_res: - mlx5_vdpa_free_resources(&ndev->mvdev); -err_mpfs: - if (!is_zero_ether_addr(config->mac)) - mlx5_mpfs_del_mac(pfmdev, config->mac); err_alloc: put_device(&mvdev->vdev.dev); return err; @@ -3775,15 +4024,48 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * unregister_link_notifier(ndev); _vdpa_unregister_device(dev); + + down_write(&ndev->reslock); + teardown_vq_resources(ndev); + up_write(&ndev->reslock); + wq = mvdev->wq; mvdev->wq = NULL; destroy_workqueue(wq); mgtdev->ndev = NULL; } +static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *add_config) +{ + struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + struct mlx5_core_dev *mdev; + int err = -EOPNOTSUPP; + + mvdev = to_mvdev(dev); + ndev = to_mlx5_vdpa_ndev(mvdev); + mdev = mvdev->mdev; + config = &ndev->config; + + down_write(&ndev->reslock); + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (!err) + ether_addr_copy(config->mac, add_config->net.mac); + } + + up_write(&ndev->reslock); + return err; +} + static const struct vdpa_mgmtdev_ops mdev_ops = { .dev_add = mlx5_vdpa_dev_add, .dev_del = mlx5_vdpa_dev_del, + .dev_set_attr = mlx5_vdpa_set_attr, }; static struct virtio_device_id id_table[] = { diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h index 90b556a57971..00e79a7d0be8 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -56,6 +56,7 @@ struct mlx5_vdpa_net { struct dentry *rx_dent; struct dentry *rx_table_dent; bool setup; + bool needs_teardown; u32 cur_num_vqs; u32 rqt_size; bool nb_registered; diff --git a/drivers/vdpa/octeon_ep/Makefile b/drivers/vdpa/octeon_ep/Makefile new file mode 100644 index 000000000000..e23e2ff14f33 --- /dev/null +++ b/drivers/vdpa/octeon_ep/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_OCTEONEP_VDPA) += octep_vdpa.o +octep_vdpa-$(CONFIG_OCTEONEP_VDPA) += octep_vdpa_main.o +octep_vdpa-$(CONFIG_OCTEONEP_VDPA) += octep_vdpa_hw.o diff --git a/drivers/vdpa/octeon_ep/octep_vdpa.h b/drivers/vdpa/octeon_ep/octep_vdpa.h new file mode 100644 index 000000000000..53b020b019f7 --- /dev/null +++ b/drivers/vdpa/octeon_ep/octep_vdpa.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2024 Marvell. + */ +#ifndef __OCTEP_VDPA_H__ +#define __OCTEP_VDPA_H__ + +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/vdpa.h> +#include <linux/virtio_pci_modern.h> +#include <uapi/linux/virtio_crypto.h> +#include <uapi/linux/virtio_net.h> +#include <uapi/linux/virtio_blk.h> +#include <uapi/linux/virtio_config.h> +#include <uapi/linux/virtio_pci.h> +#include <uapi/linux/vdpa.h> + +#define OCTEP_VDPA_DEVID_CN106K_PF 0xb900 +#define OCTEP_VDPA_DEVID_CN106K_VF 0xb903 +#define OCTEP_VDPA_DEVID_CN105K_PF 0xba00 +#define OCTEP_VDPA_DEVID_CN105K_VF 0xba03 +#define OCTEP_VDPA_DEVID_CN103K_PF 0xbd00 +#define OCTEP_VDPA_DEVID_CN103K_VF 0xbd03 + +#define OCTEP_HW_MBOX_BAR 0 +#define OCTEP_HW_CAPS_BAR 4 + +#define OCTEP_DEV_READY_SIGNATURE 0xBABABABA + +#define OCTEP_EPF_RINFO(x) (0x000209f0 | ((x) << 25)) +#define OCTEP_VF_MBOX_DATA(x) (0x00010210 | ((x) << 17)) +#define OCTEP_PF_MBOX_DATA(x) (0x00022000 | ((x) << 4)) +#define OCTEP_VF_IN_CTRL(x) (0x00010000 | ((x) << 17)) +#define OCTEP_VF_IN_CTRL_RPVF(val) (((val) >> 48) & 0xF) + +#define OCTEP_FW_READY_SIGNATURE0 0xFEEDFEED +#define OCTEP_FW_READY_SIGNATURE1 0x3355ffaa +#define OCTEP_MAX_CB_INTR 8 + +enum octep_vdpa_dev_status { + OCTEP_VDPA_DEV_STATUS_INVALID, + OCTEP_VDPA_DEV_STATUS_ALLOC, + OCTEP_VDPA_DEV_STATUS_WAIT_FOR_BAR_INIT, + OCTEP_VDPA_DEV_STATUS_INIT, + OCTEP_VDPA_DEV_STATUS_READY, + OCTEP_VDPA_DEV_STATUS_UNINIT +}; + +struct octep_vring_info { + struct vdpa_callback cb; + void __iomem *notify_addr; + void __iomem *cb_notify_addr; + phys_addr_t notify_pa; +}; + +enum octep_pci_vndr_cfg_type { + OCTEP_PCI_VNDR_CFG_TYPE_VIRTIO_ID, + OCTEP_PCI_VNDR_CFG_TYPE_MAX, +}; + +struct octep_pci_vndr_data { + struct virtio_pci_vndr_data hdr; + u8 id; + u8 bar; + union { + u64 data; + struct { + u32 offset; + u32 length; + }; + }; +}; + +struct octep_hw { + struct pci_dev *pdev; + u8 __iomem *base[PCI_STD_NUM_BARS]; + struct virtio_pci_common_cfg __iomem *common_cfg; + u8 __iomem *dev_cfg; + u8 __iomem *isr; + void __iomem *notify_base; + phys_addr_t notify_base_pa; + u32 notify_off_multiplier; + u8 notify_bar; + struct octep_vring_info *vqs; + struct vdpa_callback config_cb; + u64 features; + u16 nr_vring; + u32 config_size; + int nb_irqs; + int *irqs; + u8 dev_id; +}; + +u8 octep_hw_get_status(struct octep_hw *oct_hw); +void octep_hw_set_status(struct octep_hw *dev, uint8_t status); +void octep_hw_reset(struct octep_hw *oct_hw); +void octep_write_queue_select(struct octep_hw *oct_hw, u16 queue_id); +void octep_notify_queue(struct octep_hw *oct_hw, u16 qid); +void octep_read_dev_config(struct octep_hw *oct_hw, u64 offset, void *dst, int length); +int octep_set_vq_address(struct octep_hw *oct_hw, u16 qid, u64 desc_area, u64 driver_area, + u64 device_area); +void octep_set_vq_num(struct octep_hw *oct_hw, u16 qid, u32 num); +void octep_set_vq_ready(struct octep_hw *oct_hw, u16 qid, bool ready); +bool octep_get_vq_ready(struct octep_hw *oct_hw, u16 qid); +int octep_set_vq_state(struct octep_hw *oct_hw, u16 qid, const struct vdpa_vq_state *state); +int octep_get_vq_state(struct octep_hw *oct_hw, u16 qid, struct vdpa_vq_state *state); +u16 octep_get_vq_size(struct octep_hw *oct_hw); +int octep_hw_caps_read(struct octep_hw *oct_hw, struct pci_dev *pdev); +u64 octep_hw_get_dev_features(struct octep_hw *oct_hw); +void octep_hw_set_drv_features(struct octep_hw *oct_hw, u64 features); +u64 octep_hw_get_drv_features(struct octep_hw *oct_hw); +int octep_verify_features(u64 features); + +#endif /* __OCTEP_VDPA_H__ */ diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c new file mode 100644 index 000000000000..74240101c505 --- /dev/null +++ b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c @@ -0,0 +1,549 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Marvell. */ + +#include <linux/iopoll.h> +#include <linux/build_bug.h> + +#include "octep_vdpa.h" + +enum octep_mbox_ids { + OCTEP_MBOX_MSG_SET_VQ_STATE = 1, + OCTEP_MBOX_MSG_GET_VQ_STATE, +}; + +#define OCTEP_HW_TIMEOUT 10000000 + +#define MBOX_OFFSET 64 +#define MBOX_RSP_MASK 0x00000001 +#define MBOX_RC_MASK 0x0000FFFE + +#define MBOX_RSP_TO_ERR(val) (-(((val) & MBOX_RC_MASK) >> 2)) +#define MBOX_AVAIL(val) (((val) & MBOX_RSP_MASK)) +#define MBOX_RSP(val) ((val) & (MBOX_RC_MASK | MBOX_RSP_MASK)) + +#define DEV_RST_ACK_BIT 7 +#define FEATURE_SEL_ACK_BIT 15 +#define QUEUE_SEL_ACK_BIT 15 + +struct octep_mbox_hdr { + u8 ver; + u8 rsvd1; + u16 id; + u16 rsvd2; +#define MBOX_REQ_SIG (0xdead) +#define MBOX_RSP_SIG (0xbeef) + u16 sig; +}; + +struct octep_mbox_sts { + u16 rsp:1; + u16 rc:15; + u16 rsvd; +}; + +struct octep_mbox { + struct octep_mbox_hdr hdr; + struct octep_mbox_sts sts; + u64 rsvd; + u32 data[]; +}; + +static inline struct octep_mbox __iomem *octep_get_mbox(struct octep_hw *oct_hw) +{ + return (struct octep_mbox __iomem *)(oct_hw->dev_cfg + MBOX_OFFSET); +} + +static inline int octep_wait_for_mbox_avail(struct octep_mbox __iomem *mbox) +{ + u32 val; + + return readx_poll_timeout(ioread32, &mbox->sts, val, MBOX_AVAIL(val), 10, + OCTEP_HW_TIMEOUT); +} + +static inline int octep_wait_for_mbox_rsp(struct octep_mbox __iomem *mbox) +{ + u32 val; + + return readx_poll_timeout(ioread32, &mbox->sts, val, MBOX_RSP(val), 10, + OCTEP_HW_TIMEOUT); +} + +static inline void octep_write_hdr(struct octep_mbox __iomem *mbox, u16 id, u16 sig) +{ + iowrite16(id, &mbox->hdr.id); + iowrite16(sig, &mbox->hdr.sig); +} + +static inline u32 octep_read_sig(struct octep_mbox __iomem *mbox) +{ + return ioread16(&mbox->hdr.sig); +} + +static inline void octep_write_sts(struct octep_mbox __iomem *mbox, u32 sts) +{ + iowrite32(sts, &mbox->sts); +} + +static inline u32 octep_read_sts(struct octep_mbox __iomem *mbox) +{ + return ioread32(&mbox->sts); +} + +static inline u32 octep_read32_word(struct octep_mbox __iomem *mbox, u16 word_idx) +{ + return ioread32(&mbox->data[word_idx]); +} + +static inline void octep_write32_word(struct octep_mbox __iomem *mbox, u16 word_idx, u32 word) +{ + return iowrite32(word, &mbox->data[word_idx]); +} + +static int octep_process_mbox(struct octep_hw *oct_hw, u16 id, u16 qid, void *buffer, + u32 buf_size, bool write) +{ + struct octep_mbox __iomem *mbox = octep_get_mbox(oct_hw); + struct pci_dev *pdev = oct_hw->pdev; + u32 *p = (u32 *)buffer; + u16 data_wds; + int ret, i; + u32 val; + + if (!IS_ALIGNED(buf_size, 4)) + return -EINVAL; + + /* Make sure mbox space is available */ + ret = octep_wait_for_mbox_avail(mbox); + if (ret) { + dev_warn(&pdev->dev, "Timeout waiting for previous mbox data to be consumed\n"); + return ret; + } + data_wds = buf_size / 4; + + if (write) { + for (i = 1; i <= data_wds; i++) { + octep_write32_word(mbox, i, *p); + p++; + } + } + octep_write32_word(mbox, 0, (u32)qid); + octep_write_sts(mbox, 0); + + octep_write_hdr(mbox, id, MBOX_REQ_SIG); + + ret = octep_wait_for_mbox_rsp(mbox); + if (ret) { + dev_warn(&pdev->dev, "Timeout waiting for mbox : %d response\n", id); + return ret; + } + + val = octep_read_sig(mbox); + if ((val & 0xFFFF) != MBOX_RSP_SIG) { + dev_warn(&pdev->dev, "Invalid Signature from mbox : %d response\n", id); + return -EINVAL; + } + + val = octep_read_sts(mbox); + if (val & MBOX_RC_MASK) { + ret = MBOX_RSP_TO_ERR(val); + dev_warn(&pdev->dev, "Error while processing mbox : %d, err %d\n", id, ret); + return ret; + } + + if (!write) + for (i = 1; i <= data_wds; i++) + *p++ = octep_read32_word(mbox, i); + + return 0; +} + +static void octep_mbox_init(struct octep_mbox __iomem *mbox) +{ + iowrite32(1, &mbox->sts); +} + +int octep_verify_features(u64 features) +{ + /* Minimum features to expect */ + if (!(features & BIT_ULL(VIRTIO_F_VERSION_1))) + return -EOPNOTSUPP; + + if (!(features & BIT_ULL(VIRTIO_F_NOTIFICATION_DATA))) + return -EOPNOTSUPP; + + if (!(features & BIT_ULL(VIRTIO_F_RING_PACKED))) + return -EOPNOTSUPP; + + return 0; +} + +u8 octep_hw_get_status(struct octep_hw *oct_hw) +{ + return ioread8(&oct_hw->common_cfg->device_status); +} + +void octep_hw_set_status(struct octep_hw *oct_hw, u8 status) +{ + iowrite8(status, &oct_hw->common_cfg->device_status); +} + +void octep_hw_reset(struct octep_hw *oct_hw) +{ + u8 val; + + octep_hw_set_status(oct_hw, 0 | BIT(DEV_RST_ACK_BIT)); + if (readx_poll_timeout(ioread8, &oct_hw->common_cfg->device_status, val, !val, 10, + OCTEP_HW_TIMEOUT)) { + dev_warn(&oct_hw->pdev->dev, "Octeon device reset timeout\n"); + return; + } +} + +static int feature_sel_write_with_timeout(struct octep_hw *oct_hw, u32 select, void __iomem *addr) +{ + u32 val; + + iowrite32(select | BIT(FEATURE_SEL_ACK_BIT), addr); + + if (readx_poll_timeout(ioread32, addr, val, val == select, 10, OCTEP_HW_TIMEOUT)) { + dev_warn(&oct_hw->pdev->dev, "Feature select%d write timeout\n", select); + return -1; + } + return 0; +} + +u64 octep_hw_get_dev_features(struct octep_hw *oct_hw) +{ + u32 features_lo, features_hi; + + if (feature_sel_write_with_timeout(oct_hw, 0, &oct_hw->common_cfg->device_feature_select)) + return 0; + + features_lo = ioread32(&oct_hw->common_cfg->device_feature); + + if (feature_sel_write_with_timeout(oct_hw, 1, &oct_hw->common_cfg->device_feature_select)) + return 0; + + features_hi = ioread32(&oct_hw->common_cfg->device_feature); + + return ((u64)features_hi << 32) | features_lo; +} + +u64 octep_hw_get_drv_features(struct octep_hw *oct_hw) +{ + u32 features_lo, features_hi; + + if (feature_sel_write_with_timeout(oct_hw, 0, &oct_hw->common_cfg->guest_feature_select)) + return 0; + + features_lo = ioread32(&oct_hw->common_cfg->guest_feature); + + if (feature_sel_write_with_timeout(oct_hw, 1, &oct_hw->common_cfg->guest_feature_select)) + return 0; + + features_hi = ioread32(&oct_hw->common_cfg->guest_feature); + + return ((u64)features_hi << 32) | features_lo; +} + +void octep_hw_set_drv_features(struct octep_hw *oct_hw, u64 features) +{ + if (feature_sel_write_with_timeout(oct_hw, 0, &oct_hw->common_cfg->guest_feature_select)) + return; + + iowrite32(features & (BIT_ULL(32) - 1), &oct_hw->common_cfg->guest_feature); + + if (feature_sel_write_with_timeout(oct_hw, 1, &oct_hw->common_cfg->guest_feature_select)) + return; + + iowrite32(features >> 32, &oct_hw->common_cfg->guest_feature); +} + +void octep_write_queue_select(struct octep_hw *oct_hw, u16 queue_id) +{ + u16 val; + + iowrite16(queue_id | BIT(QUEUE_SEL_ACK_BIT), &oct_hw->common_cfg->queue_select); + + if (readx_poll_timeout(ioread16, &oct_hw->common_cfg->queue_select, val, val == queue_id, + 10, OCTEP_HW_TIMEOUT)) { + dev_warn(&oct_hw->pdev->dev, "Queue select write timeout\n"); + return; + } +} + +void octep_notify_queue(struct octep_hw *oct_hw, u16 qid) +{ + iowrite16(qid, oct_hw->vqs[qid].notify_addr); +} + +void octep_read_dev_config(struct octep_hw *oct_hw, u64 offset, void *dst, int length) +{ + u8 old_gen, new_gen, *p; + int i; + + if (WARN_ON(offset + length > oct_hw->config_size)) + return; + + do { + old_gen = ioread8(&oct_hw->common_cfg->config_generation); + p = dst; + for (i = 0; i < length; i++) + *p++ = ioread8(oct_hw->dev_cfg + offset + i); + + new_gen = ioread8(&oct_hw->common_cfg->config_generation); + } while (old_gen != new_gen); +} + +int octep_set_vq_address(struct octep_hw *oct_hw, u16 qid, u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct virtio_pci_common_cfg __iomem *cfg = oct_hw->common_cfg; + + octep_write_queue_select(oct_hw, qid); + vp_iowrite64_twopart(desc_area, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + vp_iowrite64_twopart(driver_area, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + vp_iowrite64_twopart(device_area, &cfg->queue_used_lo, + &cfg->queue_used_hi); + + return 0; +} + +int octep_get_vq_state(struct octep_hw *oct_hw, u16 qid, struct vdpa_vq_state *state) +{ + return octep_process_mbox(oct_hw, OCTEP_MBOX_MSG_GET_VQ_STATE, qid, state, + sizeof(*state), 0); +} + +int octep_set_vq_state(struct octep_hw *oct_hw, u16 qid, const struct vdpa_vq_state *state) +{ + struct vdpa_vq_state q_state; + + memcpy(&q_state, state, sizeof(struct vdpa_vq_state)); + return octep_process_mbox(oct_hw, OCTEP_MBOX_MSG_SET_VQ_STATE, qid, &q_state, + sizeof(*state), 1); +} + +void octep_set_vq_num(struct octep_hw *oct_hw, u16 qid, u32 num) +{ + struct virtio_pci_common_cfg __iomem *cfg = oct_hw->common_cfg; + + octep_write_queue_select(oct_hw, qid); + iowrite16(num, &cfg->queue_size); +} + +void octep_set_vq_ready(struct octep_hw *oct_hw, u16 qid, bool ready) +{ + struct virtio_pci_common_cfg __iomem *cfg = oct_hw->common_cfg; + + octep_write_queue_select(oct_hw, qid); + iowrite16(ready, &cfg->queue_enable); +} + +bool octep_get_vq_ready(struct octep_hw *oct_hw, u16 qid) +{ + struct virtio_pci_common_cfg __iomem *cfg = oct_hw->common_cfg; + + octep_write_queue_select(oct_hw, qid); + return ioread16(&cfg->queue_enable); +} + +u16 octep_get_vq_size(struct octep_hw *oct_hw) +{ + octep_write_queue_select(oct_hw, 0); + return ioread16(&oct_hw->common_cfg->queue_size); +} + +static u32 octep_get_config_size(struct octep_hw *oct_hw) +{ + switch (oct_hw->dev_id) { + case VIRTIO_ID_NET: + return sizeof(struct virtio_net_config); + case VIRTIO_ID_CRYPTO: + return sizeof(struct virtio_crypto_config); + default: + return 0; + } +} + +static void __iomem *octep_get_cap_addr(struct octep_hw *oct_hw, struct virtio_pci_cap *cap) +{ + struct device *dev = &oct_hw->pdev->dev; + u32 length = le32_to_cpu(cap->length); + u32 offset = le32_to_cpu(cap->offset); + u8 bar = cap->bar; + u32 len; + + if (bar != OCTEP_HW_CAPS_BAR) { + dev_err(dev, "Invalid bar: %u\n", bar); + return NULL; + } + if (offset + length < offset) { + dev_err(dev, "offset(%u) + length(%u) overflows\n", + offset, length); + return NULL; + } + len = pci_resource_len(oct_hw->pdev, bar); + if (offset + length > len) { + dev_err(dev, "invalid cap: overflows bar space: %u > %u\n", + offset + length, len); + return NULL; + } + return oct_hw->base[bar] + offset; +} + +/* In Octeon DPU device, the virtio config space is completely + * emulated by the device's firmware. So, the standard pci config + * read apis can't be used for reading the virtio capability. + */ +static void octep_pci_caps_read(struct octep_hw *oct_hw, void *buf, size_t len, off_t offset) +{ + u8 __iomem *bar = oct_hw->base[OCTEP_HW_CAPS_BAR]; + u8 *p = buf; + size_t i; + + for (i = 0; i < len; i++) + *p++ = ioread8(bar + offset + i); +} + +static int octep_pci_signature_verify(struct octep_hw *oct_hw) +{ + u32 signature[2]; + + octep_pci_caps_read(oct_hw, &signature, sizeof(signature), 0); + + if (signature[0] != OCTEP_FW_READY_SIGNATURE0) + return -1; + + if (signature[1] != OCTEP_FW_READY_SIGNATURE1) + return -1; + + return 0; +} + +static void octep_vndr_data_process(struct octep_hw *oct_hw, + struct octep_pci_vndr_data *vndr_data) +{ + BUILD_BUG_ON(sizeof(struct octep_pci_vndr_data) % 4 != 0); + + switch (vndr_data->id) { + case OCTEP_PCI_VNDR_CFG_TYPE_VIRTIO_ID: + oct_hw->dev_id = (u8)vndr_data->data; + break; + default: + dev_err(&oct_hw->pdev->dev, "Invalid vendor data id %u\n", + vndr_data->id); + break; + } +} + +int octep_hw_caps_read(struct octep_hw *oct_hw, struct pci_dev *pdev) +{ + struct octep_pci_vndr_data vndr_data; + struct octep_mbox __iomem *mbox; + struct device *dev = &pdev->dev; + struct virtio_pci_cap cap; + u16 notify_off; + int i, ret; + u8 pos; + + oct_hw->pdev = pdev; + ret = octep_pci_signature_verify(oct_hw); + if (ret) { + dev_err(dev, "Octeon Virtio FW is not initialized\n"); + return -EIO; + } + + octep_pci_caps_read(oct_hw, &pos, 1, PCI_CAPABILITY_LIST); + + while (pos) { + octep_pci_caps_read(oct_hw, &cap, 2, pos); + + if (cap.cap_vndr != PCI_CAP_ID_VNDR) { + dev_err(dev, "Found invalid capability vndr id: %d\n", cap.cap_vndr); + break; + } + + octep_pci_caps_read(oct_hw, &cap, sizeof(cap), pos); + + dev_info(dev, "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u\n", + pos, cap.cfg_type, cap.bar, cap.offset, cap.length); + + switch (cap.cfg_type) { + case VIRTIO_PCI_CAP_COMMON_CFG: + oct_hw->common_cfg = octep_get_cap_addr(oct_hw, &cap); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + octep_pci_caps_read(oct_hw, &oct_hw->notify_off_multiplier, + 4, pos + sizeof(cap)); + + oct_hw->notify_base = octep_get_cap_addr(oct_hw, &cap); + oct_hw->notify_bar = cap.bar; + oct_hw->notify_base_pa = pci_resource_start(pdev, cap.bar) + + le32_to_cpu(cap.offset); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + oct_hw->dev_cfg = octep_get_cap_addr(oct_hw, &cap); + break; + case VIRTIO_PCI_CAP_ISR_CFG: + oct_hw->isr = octep_get_cap_addr(oct_hw, &cap); + break; + case VIRTIO_PCI_CAP_VENDOR_CFG: + octep_pci_caps_read(oct_hw, &vndr_data, sizeof(vndr_data), pos); + if (vndr_data.hdr.vendor_id != PCI_VENDOR_ID_CAVIUM) { + dev_err(dev, "Invalid vendor data\n"); + return -EINVAL; + } + + octep_vndr_data_process(oct_hw, &vndr_data); + break; + } + + pos = cap.cap_next; + } + if (!oct_hw->common_cfg || !oct_hw->notify_base || + !oct_hw->dev_cfg || !oct_hw->isr) { + dev_err(dev, "Incomplete PCI capabilities"); + return -EIO; + } + dev_info(dev, "common cfg mapped at: %p\n", oct_hw->common_cfg); + dev_info(dev, "device cfg mapped at: %p\n", oct_hw->dev_cfg); + dev_info(dev, "isr cfg mapped at: %p\n", oct_hw->isr); + dev_info(dev, "notify base: %p, notify off multiplier: %u\n", + oct_hw->notify_base, oct_hw->notify_off_multiplier); + + oct_hw->config_size = octep_get_config_size(oct_hw); + oct_hw->features = octep_hw_get_dev_features(oct_hw); + + ret = octep_verify_features(oct_hw->features); + if (ret) { + dev_err(&pdev->dev, "Couldn't read features from the device FW\n"); + return ret; + } + oct_hw->nr_vring = vp_ioread16(&oct_hw->common_cfg->num_queues); + + oct_hw->vqs = devm_kcalloc(&pdev->dev, oct_hw->nr_vring, sizeof(*oct_hw->vqs), GFP_KERNEL); + if (!oct_hw->vqs) + return -ENOMEM; + + dev_info(&pdev->dev, "Device features : %llx\n", oct_hw->features); + dev_info(&pdev->dev, "Maximum queues : %u\n", oct_hw->nr_vring); + + for (i = 0; i < oct_hw->nr_vring; i++) { + octep_write_queue_select(oct_hw, i); + notify_off = vp_ioread16(&oct_hw->common_cfg->queue_notify_off); + oct_hw->vqs[i].notify_addr = oct_hw->notify_base + + notify_off * oct_hw->notify_off_multiplier; + oct_hw->vqs[i].cb_notify_addr = (u32 __iomem *)oct_hw->vqs[i].notify_addr + 1; + oct_hw->vqs[i].notify_pa = oct_hw->notify_base_pa + + notify_off * oct_hw->notify_off_multiplier; + } + mbox = octep_get_mbox(oct_hw); + octep_mbox_init(mbox); + dev_info(dev, "mbox mapped at: %p\n", mbox); + + return 0; +} diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c new file mode 100644 index 000000000000..f3d4dda4e04c --- /dev/null +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -0,0 +1,892 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Marvell. */ + +#include <linux/interrupt.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/module.h> +#include <linux/iommu.h> +#include "octep_vdpa.h" + +#define OCTEP_VDPA_DRIVER_NAME "octep_vdpa" + +struct octep_pf { + u8 __iomem *base[PCI_STD_NUM_BARS]; + struct pci_dev *pdev; + struct resource res; + u64 vf_base; + int enabled_vfs; + u32 vf_stride; + u16 vf_devid; +}; + +struct octep_vdpa { + struct vdpa_device vdpa; + struct octep_hw *oct_hw; + struct pci_dev *pdev; +}; + +struct octep_vdpa_mgmt_dev { + struct vdpa_mgmt_dev mdev; + struct octep_hw oct_hw; + struct pci_dev *pdev; + /* Work entry to handle device setup */ + struct work_struct setup_task; + /* Device status */ + atomic_t status; +}; + +static struct octep_hw *vdpa_to_octep_hw(struct vdpa_device *vdpa_dev) +{ + struct octep_vdpa *oct_vdpa; + + oct_vdpa = container_of(vdpa_dev, struct octep_vdpa, vdpa); + + return oct_vdpa->oct_hw; +} + +static irqreturn_t octep_vdpa_intr_handler(int irq, void *data) +{ + struct octep_hw *oct_hw = data; + int i; + + /* Each device has multiple interrupts (nb_irqs) shared among rings + * (nr_vring). Device interrupts are mapped to the rings in a + * round-robin fashion. + * + * For example, if nb_irqs = 8 and nr_vring = 64: + * 0 -> 0, 8, 16, 24, 32, 40, 48, 56; + * 1 -> 1, 9, 17, 25, 33, 41, 49, 57; + * ... + * 7 -> 7, 15, 23, 31, 39, 47, 55, 63; + */ + + for (i = irq - oct_hw->irqs[0]; i < oct_hw->nr_vring; i += oct_hw->nb_irqs) { + if (ioread8(oct_hw->vqs[i].cb_notify_addr)) { + /* Acknowledge the per ring notification to the device */ + iowrite8(0, oct_hw->vqs[i].cb_notify_addr); + + if (likely(oct_hw->vqs[i].cb.callback)) + oct_hw->vqs[i].cb.callback(oct_hw->vqs[i].cb.private); + break; + } + } + + /* Check for config interrupt. Config uses the first interrupt */ + if (unlikely(irq == oct_hw->irqs[0] && ioread8(oct_hw->isr))) { + iowrite8(0, oct_hw->isr); + + if (oct_hw->config_cb.callback) + oct_hw->config_cb.callback(oct_hw->config_cb.private); + } + + return IRQ_HANDLED; +} + +static void octep_free_irqs(struct octep_hw *oct_hw) +{ + struct pci_dev *pdev = oct_hw->pdev; + int irq; + + if (!oct_hw->irqs) + return; + + for (irq = 0; irq < oct_hw->nb_irqs; irq++) { + if (!oct_hw->irqs[irq]) + break; + + devm_free_irq(&pdev->dev, oct_hw->irqs[irq], oct_hw); + } + + pci_free_irq_vectors(pdev); + devm_kfree(&pdev->dev, oct_hw->irqs); + oct_hw->irqs = NULL; +} + +static int octep_request_irqs(struct octep_hw *oct_hw) +{ + struct pci_dev *pdev = oct_hw->pdev; + int ret, irq, idx; + + oct_hw->irqs = devm_kcalloc(&pdev->dev, oct_hw->nb_irqs, sizeof(int), GFP_KERNEL); + if (!oct_hw->irqs) + return -ENOMEM; + + ret = pci_alloc_irq_vectors(pdev, 1, oct_hw->nb_irqs, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to alloc msix vector"); + return ret; + } + + for (idx = 0; idx < oct_hw->nb_irqs; idx++) { + irq = pci_irq_vector(pdev, idx); + ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0, + dev_name(&pdev->dev), oct_hw); + if (ret) { + dev_err(&pdev->dev, "Failed to register interrupt handler\n"); + goto free_irqs; + } + oct_hw->irqs[idx] = irq; + } + + return 0; + +free_irqs: + octep_free_irqs(oct_hw); + return ret; +} + +static u64 octep_vdpa_get_device_features(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return oct_hw->features; +} + +static int octep_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 features) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + int ret; + + pr_debug("Driver Features: %llx\n", features); + + ret = octep_verify_features(features); + if (ret) { + dev_warn(&oct_hw->pdev->dev, + "Must negotiate minimum features 0x%llx for this device", + BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_NOTIFICATION_DATA) | + BIT_ULL(VIRTIO_F_RING_PACKED)); + return ret; + } + octep_hw_set_drv_features(oct_hw, features); + + return 0; +} + +static u64 octep_vdpa_get_driver_features(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return octep_hw_get_drv_features(oct_hw); +} + +static u8 octep_vdpa_get_status(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return octep_hw_get_status(oct_hw); +} + +static void octep_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + u8 status_old; + + status_old = octep_hw_get_status(oct_hw); + + if (status_old == status) + return; + + if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && + !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) { + if (octep_request_irqs(oct_hw)) + status = status_old | VIRTIO_CONFIG_S_FAILED; + } + octep_hw_set_status(oct_hw, status); +} + +static int octep_vdpa_reset(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + u8 status = octep_hw_get_status(oct_hw); + u16 qid; + + if (status == 0) + return 0; + + for (qid = 0; qid < oct_hw->nr_vring; qid++) { + oct_hw->vqs[qid].cb.callback = NULL; + oct_hw->vqs[qid].cb.private = NULL; + oct_hw->config_cb.callback = NULL; + oct_hw->config_cb.private = NULL; + } + octep_hw_reset(oct_hw); + + if (status & VIRTIO_CONFIG_S_DRIVER_OK) + octep_free_irqs(oct_hw); + + return 0; +} + +static u16 octep_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return octep_get_vq_size(oct_hw); +} + +static int octep_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_vq_state *state) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return octep_get_vq_state(oct_hw, qid, state); +} + +static int octep_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + const struct vdpa_vq_state *state) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return octep_set_vq_state(oct_hw, qid, state); +} + +static void octep_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, struct vdpa_callback *cb) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + oct_hw->vqs[qid].cb = *cb; +} + +static void octep_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + octep_set_vq_ready(oct_hw, qid, ready); +} + +static bool octep_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return octep_get_vq_ready(oct_hw, qid); +} + +static void octep_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, u32 num) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + octep_set_vq_num(oct_hw, qid, num); +} + +static int octep_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, u64 desc_area, + u64 driver_area, u64 device_area) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + pr_debug("qid[%d]: desc_area: %llx\n", qid, desc_area); + pr_debug("qid[%d]: driver_area: %llx\n", qid, driver_area); + pr_debug("qid[%d]: device_area: %llx\n\n", qid, device_area); + + return octep_set_vq_address(oct_hw, qid, desc_area, driver_area, device_area); +} + +static void octep_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) +{ + /* Not supported */ +} + +static void octep_vdpa_kick_vq_with_data(struct vdpa_device *vdpa_dev, u32 data) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + u16 idx = data & 0xFFFF; + + vp_iowrite32(data, oct_hw->vqs[idx].notify_addr); +} + +static u32 octep_vdpa_get_generation(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return vp_ioread8(&oct_hw->common_cfg->config_generation); +} + +static u32 octep_vdpa_get_device_id(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return oct_hw->dev_id; +} + +static u32 octep_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) +{ + return PCI_VENDOR_ID_CAVIUM; +} + +static u32 octep_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) +{ + return PAGE_SIZE; +} + +static size_t octep_vdpa_get_config_size(struct vdpa_device *vdpa_dev) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + return oct_hw->config_size; +} + +static void octep_vdpa_get_config(struct vdpa_device *vdpa_dev, unsigned int offset, void *buf, + unsigned int len) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + octep_read_dev_config(oct_hw, offset, buf, len); +} + +static void octep_vdpa_set_config(struct vdpa_device *vdpa_dev, unsigned int offset, + const void *buf, unsigned int len) +{ + /* Not supported */ +} + +static void octep_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, struct vdpa_callback *cb) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + + oct_hw->config_cb.callback = cb->callback; + oct_hw->config_cb.private = cb->private; +} + +static struct vdpa_notification_area octep_get_vq_notification(struct vdpa_device *vdpa_dev, + u16 idx) +{ + struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev); + struct vdpa_notification_area area; + + area.addr = oct_hw->vqs[idx].notify_pa; + area.size = PAGE_SIZE; + + return area; +} + +static struct vdpa_config_ops octep_vdpa_ops = { + .get_device_features = octep_vdpa_get_device_features, + .set_driver_features = octep_vdpa_set_driver_features, + .get_driver_features = octep_vdpa_get_driver_features, + .get_status = octep_vdpa_get_status, + .set_status = octep_vdpa_set_status, + .reset = octep_vdpa_reset, + .get_vq_num_max = octep_vdpa_get_vq_num_max, + .get_vq_state = octep_vdpa_get_vq_state, + .set_vq_state = octep_vdpa_set_vq_state, + .set_vq_cb = octep_vdpa_set_vq_cb, + .set_vq_ready = octep_vdpa_set_vq_ready, + .get_vq_ready = octep_vdpa_get_vq_ready, + .set_vq_num = octep_vdpa_set_vq_num, + .set_vq_address = octep_vdpa_set_vq_address, + .get_vq_irq = NULL, + .kick_vq = octep_vdpa_kick_vq, + .kick_vq_with_data = octep_vdpa_kick_vq_with_data, + .get_generation = octep_vdpa_get_generation, + .get_device_id = octep_vdpa_get_device_id, + .get_vendor_id = octep_vdpa_get_vendor_id, + .get_vq_align = octep_vdpa_get_vq_align, + .get_config_size = octep_vdpa_get_config_size, + .get_config = octep_vdpa_get_config, + .set_config = octep_vdpa_set_config, + .set_config_cb = octep_vdpa_set_config_cb, + .get_vq_notification = octep_get_vq_notification, +}; + +static int octep_iomap_region(struct pci_dev *pdev, u8 __iomem **tbl, u8 bar) +{ + int ret; + + ret = pci_request_region(pdev, bar, OCTEP_VDPA_DRIVER_NAME); + if (ret) { + dev_err(&pdev->dev, "Failed to request BAR:%u region\n", bar); + return ret; + } + + tbl[bar] = pci_iomap(pdev, bar, pci_resource_len(pdev, bar)); + if (!tbl[bar]) { + dev_err(&pdev->dev, "Failed to iomap BAR:%u\n", bar); + pci_release_region(pdev, bar); + ret = -ENOMEM; + } + + return ret; +} + +static void octep_iounmap_region(struct pci_dev *pdev, u8 __iomem **tbl, u8 bar) +{ + pci_iounmap(pdev, tbl[bar]); + pci_release_region(pdev, bar); +} + +static void octep_vdpa_pf_bar_shrink(struct octep_pf *octpf) +{ + struct pci_dev *pf_dev = octpf->pdev; + struct resource *res = pf_dev->resource + PCI_STD_RESOURCES + 4; + struct pci_bus_region bus_region; + + octpf->res.start = res->start; + octpf->res.end = res->end; + octpf->vf_base = res->start; + + bus_region.start = res->start; + bus_region.end = res->start - 1; + + pcibios_bus_to_resource(pf_dev->bus, res, &bus_region); +} + +static void octep_vdpa_pf_bar_expand(struct octep_pf *octpf) +{ + struct pci_dev *pf_dev = octpf->pdev; + struct resource *res = pf_dev->resource + PCI_STD_RESOURCES + 4; + struct pci_bus_region bus_region; + + bus_region.start = octpf->res.start; + bus_region.end = octpf->res.end; + + pcibios_bus_to_resource(pf_dev->bus, res, &bus_region); +} + +static void octep_vdpa_remove_pf(struct pci_dev *pdev) +{ + struct octep_pf *octpf = pci_get_drvdata(pdev); + + pci_disable_sriov(pdev); + + if (octpf->base[OCTEP_HW_CAPS_BAR]) + octep_iounmap_region(pdev, octpf->base, OCTEP_HW_CAPS_BAR); + + if (octpf->base[OCTEP_HW_MBOX_BAR]) + octep_iounmap_region(pdev, octpf->base, OCTEP_HW_MBOX_BAR); + + octep_vdpa_pf_bar_expand(octpf); +} + +static void octep_vdpa_vf_bar_shrink(struct pci_dev *pdev) +{ + struct resource *vf_res = pdev->resource + PCI_STD_RESOURCES + 4; + + memset(vf_res, 0, sizeof(*vf_res)); +} + +static void octep_vdpa_remove_vf(struct pci_dev *pdev) +{ + struct octep_vdpa_mgmt_dev *mgmt_dev = pci_get_drvdata(pdev); + struct octep_hw *oct_hw; + int status; + + oct_hw = &mgmt_dev->oct_hw; + status = atomic_read(&mgmt_dev->status); + atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_UNINIT); + + cancel_work_sync(&mgmt_dev->setup_task); + if (status == OCTEP_VDPA_DEV_STATUS_READY) + vdpa_mgmtdev_unregister(&mgmt_dev->mdev); + + if (oct_hw->base[OCTEP_HW_CAPS_BAR]) + octep_iounmap_region(pdev, oct_hw->base, OCTEP_HW_CAPS_BAR); + + if (oct_hw->base[OCTEP_HW_MBOX_BAR]) + octep_iounmap_region(pdev, oct_hw->base, OCTEP_HW_MBOX_BAR); + + octep_vdpa_vf_bar_shrink(pdev); +} + +static void octep_vdpa_remove(struct pci_dev *pdev) +{ + if (pdev->is_virtfn) + octep_vdpa_remove_vf(pdev); + else + octep_vdpa_remove_pf(pdev); +} + +static int octep_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) +{ + struct octep_vdpa_mgmt_dev *mgmt_dev = container_of(mdev, struct octep_vdpa_mgmt_dev, mdev); + struct octep_hw *oct_hw = &mgmt_dev->oct_hw; + struct pci_dev *pdev = oct_hw->pdev; + struct vdpa_device *vdpa_dev; + struct octep_vdpa *oct_vdpa; + u64 device_features; + int ret; + + oct_vdpa = vdpa_alloc_device(struct octep_vdpa, vdpa, &pdev->dev, &octep_vdpa_ops, 1, 1, + NULL, false); + if (IS_ERR(oct_vdpa)) { + dev_err(&pdev->dev, "Failed to allocate vDPA structure for octep vdpa device"); + return PTR_ERR(oct_vdpa); + } + + oct_vdpa->pdev = pdev; + oct_vdpa->vdpa.dma_dev = &pdev->dev; + oct_vdpa->vdpa.mdev = mdev; + oct_vdpa->oct_hw = oct_hw; + vdpa_dev = &oct_vdpa->vdpa; + + device_features = oct_hw->features; + if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { + if (config->device_features & ~device_features) { + dev_err(&pdev->dev, "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", + config->device_features, device_features); + ret = -EINVAL; + goto vdpa_dev_put; + } + device_features &= config->device_features; + } + + oct_hw->features = device_features; + dev_info(&pdev->dev, "Vdpa management device features : %llx\n", device_features); + + ret = octep_verify_features(device_features); + if (ret) { + dev_warn(mdev->device, + "Must provision minimum features 0x%llx for this device", + BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | + BIT_ULL(VIRTIO_F_NOTIFICATION_DATA) | BIT_ULL(VIRTIO_F_RING_PACKED)); + goto vdpa_dev_put; + } + if (name) + ret = dev_set_name(&vdpa_dev->dev, "%s", name); + else + ret = dev_set_name(&vdpa_dev->dev, "vdpa%u", vdpa_dev->index); + + ret = _vdpa_register_device(&oct_vdpa->vdpa, oct_hw->nr_vring); + if (ret) { + dev_err(&pdev->dev, "Failed to register to vDPA bus"); + goto vdpa_dev_put; + } + return 0; + +vdpa_dev_put: + put_device(&oct_vdpa->vdpa.dev); + return ret; +} + +static void octep_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *vdpa_dev) +{ + _vdpa_unregister_device(vdpa_dev); +} + +static const struct vdpa_mgmtdev_ops octep_vdpa_mgmt_dev_ops = { + .dev_add = octep_vdpa_dev_add, + .dev_del = octep_vdpa_dev_del +}; + +static bool get_device_ready_status(u8 __iomem *addr) +{ + u64 signature = readq(addr + OCTEP_VF_MBOX_DATA(0)); + + if (signature == OCTEP_DEV_READY_SIGNATURE) { + writeq(0, addr + OCTEP_VF_MBOX_DATA(0)); + return true; + } + + return false; +} + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static void octep_vdpa_setup_task(struct work_struct *work) +{ + struct octep_vdpa_mgmt_dev *mgmt_dev = container_of(work, struct octep_vdpa_mgmt_dev, + setup_task); + struct pci_dev *pdev = mgmt_dev->pdev; + struct device *dev = &pdev->dev; + struct octep_hw *oct_hw; + unsigned long timeout; + u64 val; + int ret; + + oct_hw = &mgmt_dev->oct_hw; + + atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_WAIT_FOR_BAR_INIT); + + /* Wait for a maximum of 5 sec */ + timeout = jiffies + msecs_to_jiffies(5000); + while (!time_after(jiffies, timeout)) { + if (get_device_ready_status(oct_hw->base[OCTEP_HW_MBOX_BAR])) { + atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_INIT); + break; + } + + if (atomic_read(&mgmt_dev->status) >= OCTEP_VDPA_DEV_STATUS_READY) { + dev_info(dev, "Stopping vDPA setup task.\n"); + return; + } + + usleep_range(1000, 1500); + } + + if (atomic_read(&mgmt_dev->status) != OCTEP_VDPA_DEV_STATUS_INIT) { + dev_err(dev, "BAR initialization is timed out\n"); + return; + } + + ret = octep_iomap_region(pdev, oct_hw->base, OCTEP_HW_CAPS_BAR); + if (ret) + return; + + val = readq(oct_hw->base[OCTEP_HW_MBOX_BAR] + OCTEP_VF_IN_CTRL(0)); + oct_hw->nb_irqs = OCTEP_VF_IN_CTRL_RPVF(val); + if (!oct_hw->nb_irqs || oct_hw->nb_irqs > OCTEP_MAX_CB_INTR) { + dev_err(dev, "Invalid number of interrupts %d\n", oct_hw->nb_irqs); + goto unmap_region; + } + + ret = octep_hw_caps_read(oct_hw, pdev); + if (ret < 0) + goto unmap_region; + + mgmt_dev->mdev.ops = &octep_vdpa_mgmt_dev_ops; + mgmt_dev->mdev.id_table = id_table; + mgmt_dev->mdev.max_supported_vqs = oct_hw->nr_vring; + mgmt_dev->mdev.supported_features = oct_hw->features; + mgmt_dev->mdev.config_attr_mask = (1 << VDPA_ATTR_DEV_FEATURES); + mgmt_dev->mdev.device = dev; + + ret = vdpa_mgmtdev_register(&mgmt_dev->mdev); + if (ret) { + dev_err(dev, "Failed to register vdpa management interface\n"); + goto unmap_region; + } + + atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_READY); + + return; + +unmap_region: + octep_iounmap_region(pdev, oct_hw->base, OCTEP_HW_CAPS_BAR); + oct_hw->base[OCTEP_HW_CAPS_BAR] = NULL; +} + +static int octep_vdpa_probe_vf(struct pci_dev *pdev) +{ + struct octep_vdpa_mgmt_dev *mgmt_dev; + struct device *dev = &pdev->dev; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "Failed to enable device\n"); + return ret; + } + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(dev, "No usable DMA configuration\n"); + return ret; + } + pci_set_master(pdev); + + mgmt_dev = devm_kzalloc(dev, sizeof(struct octep_vdpa_mgmt_dev), GFP_KERNEL); + if (!mgmt_dev) + return -ENOMEM; + + ret = octep_iomap_region(pdev, mgmt_dev->oct_hw.base, OCTEP_HW_MBOX_BAR); + if (ret) + return ret; + + mgmt_dev->pdev = pdev; + pci_set_drvdata(pdev, mgmt_dev); + + atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_ALLOC); + INIT_WORK(&mgmt_dev->setup_task, octep_vdpa_setup_task); + schedule_work(&mgmt_dev->setup_task); + dev_info(&pdev->dev, "octep vdpa mgmt device setup task is queued\n"); + + return 0; +} + +static void octep_vdpa_assign_barspace(struct pci_dev *vf_dev, struct pci_dev *pf_dev, u8 idx) +{ + struct resource *vf_res = vf_dev->resource + PCI_STD_RESOURCES + 4; + struct resource *pf_res = pf_dev->resource + PCI_STD_RESOURCES + 4; + struct octep_pf *pf = pci_get_drvdata(pf_dev); + struct pci_bus_region bus_region; + + vf_res->name = pci_name(vf_dev); + vf_res->flags = pf_res->flags; + vf_res->parent = (pf_dev->resource + PCI_STD_RESOURCES)->parent; + + bus_region.start = pf->vf_base + idx * pf->vf_stride; + bus_region.end = bus_region.start + pf->vf_stride - 1; + pcibios_bus_to_resource(vf_dev->bus, vf_res, &bus_region); +} + +static int octep_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct octep_pf *pf = pci_get_drvdata(pdev); + u8 __iomem *addr = pf->base[OCTEP_HW_MBOX_BAR]; + struct pci_dev *vf_pdev = NULL; + bool done = false; + int index = 0; + int ret, i; + + ret = pci_enable_sriov(pdev, num_vfs); + if (ret) + return ret; + + pf->enabled_vfs = num_vfs; + + while ((vf_pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, vf_pdev))) { + if (vf_pdev->device != pf->vf_devid) + continue; + + octep_vdpa_assign_barspace(vf_pdev, pdev, index); + if (++index == num_vfs) { + done = true; + break; + } + } + + if (done) { + for (i = 0; i < pf->enabled_vfs; i++) + writeq(OCTEP_DEV_READY_SIGNATURE, addr + OCTEP_PF_MBOX_DATA(i)); + } + + return num_vfs; +} + +static int octep_sriov_disable(struct pci_dev *pdev) +{ + struct octep_pf *pf = pci_get_drvdata(pdev); + + if (!pci_num_vf(pdev)) + return 0; + + pci_disable_sriov(pdev); + pf->enabled_vfs = 0; + + return 0; +} + +static int octep_vdpa_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + if (num_vfs > 0) + return octep_sriov_enable(pdev, num_vfs); + else + return octep_sriov_disable(pdev); +} + +static u16 octep_get_vf_devid(struct pci_dev *pdev) +{ + u16 did; + + switch (pdev->device) { + case OCTEP_VDPA_DEVID_CN106K_PF: + did = OCTEP_VDPA_DEVID_CN106K_VF; + break; + case OCTEP_VDPA_DEVID_CN105K_PF: + did = OCTEP_VDPA_DEVID_CN105K_VF; + break; + case OCTEP_VDPA_DEVID_CN103K_PF: + did = OCTEP_VDPA_DEVID_CN103K_VF; + break; + default: + did = 0xFFFF; + break; + } + + return did; +} + +static int octep_vdpa_pf_setup(struct octep_pf *octpf) +{ + u8 __iomem *addr = octpf->base[OCTEP_HW_MBOX_BAR]; + struct pci_dev *pdev = octpf->pdev; + int totalvfs; + size_t len; + u64 val; + + totalvfs = pci_sriov_get_totalvfs(pdev); + if (unlikely(!totalvfs)) { + dev_info(&pdev->dev, "Total VFs are %d in PF sriov configuration\n", totalvfs); + return 0; + } + + addr = octpf->base[OCTEP_HW_MBOX_BAR]; + val = readq(addr + OCTEP_EPF_RINFO(0)); + if (val == 0) { + dev_err(&pdev->dev, "Invalid device configuration\n"); + return -EINVAL; + } + + len = pci_resource_len(pdev, OCTEP_HW_CAPS_BAR); + + octpf->vf_stride = len / totalvfs; + octpf->vf_devid = octep_get_vf_devid(pdev); + + octep_vdpa_pf_bar_shrink(octpf); + + return 0; +} + +static int octep_vdpa_probe_pf(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct octep_pf *octpf; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "Failed to enable device\n"); + return ret; + } + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(dev, "No usable DMA configuration\n"); + return ret; + } + octpf = devm_kzalloc(dev, sizeof(*octpf), GFP_KERNEL); + if (!octpf) + return -ENOMEM; + + ret = octep_iomap_region(pdev, octpf->base, OCTEP_HW_MBOX_BAR); + if (ret) + return ret; + + pci_set_master(pdev); + pci_set_drvdata(pdev, octpf); + octpf->pdev = pdev; + + ret = octep_vdpa_pf_setup(octpf); + if (ret) + goto unmap_region; + + return 0; + +unmap_region: + octep_iounmap_region(pdev, octpf->base, OCTEP_HW_MBOX_BAR); + return ret; +} + +static int octep_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + if (pdev->is_virtfn) + return octep_vdpa_probe_vf(pdev); + else + return octep_vdpa_probe_pf(pdev); +} + +static struct pci_device_id octep_pci_vdpa_map[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_VDPA_DEVID_CN106K_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_VDPA_DEVID_CN106K_VF) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_VDPA_DEVID_CN105K_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_VDPA_DEVID_CN105K_VF) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_VDPA_DEVID_CN103K_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_VDPA_DEVID_CN103K_VF) }, + { 0 }, +}; + +static struct pci_driver octep_pci_vdpa = { + .name = OCTEP_VDPA_DRIVER_NAME, + .id_table = octep_pci_vdpa_map, + .probe = octep_vdpa_probe, + .remove = octep_vdpa_remove, + .sriov_configure = octep_vdpa_sriov_configure +}; + +module_pci_driver(octep_pci_vdpa); + +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION("Marvell Octeon PCIe endpoint vDPA driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vdpa/pds/aux_drv.c b/drivers/vdpa/pds/aux_drv.c index 186e9ee22eb1..f57330cf9024 100644 --- a/drivers/vdpa/pds/aux_drv.c +++ b/drivers/vdpa/pds/aux_drv.c @@ -93,8 +93,8 @@ static void pds_vdpa_remove(struct auxiliary_device *aux_dev) struct device *dev = &aux_dev->dev; vdpa_mgmtdev_unregister(&vdpa_aux->vdpa_mdev); + pds_vdpa_release_irqs(vdpa_aux->pdsv); vp_modern_remove(&vdpa_aux->vd_mdev); - pci_free_irq_vectors(vdpa_aux->padev->vf_pdev); pds_vdpa_debugfs_del_vdpadev(vdpa_aux); kfree(vdpa_aux); diff --git a/drivers/vdpa/pds/cmds.h b/drivers/vdpa/pds/cmds.h index e24d85cb8f1c..6b1bc33356b0 100644 --- a/drivers/vdpa/pds/cmds.h +++ b/drivers/vdpa/pds/cmds.h @@ -14,5 +14,4 @@ int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, struct pds_vdpa_vq_info *vq_info); int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, struct pds_vdpa_vq_info *vq_info); -int pds_vdpa_cmd_set_features(struct pds_vdpa_device *pdsv, u64 features); #endif /* _VDPA_CMDS_H_ */ diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 25c0fe5ec3d5..301d95e08596 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -426,12 +426,18 @@ err_release: return err; } -static void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv) +void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv) { - struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; - struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux; + struct pds_vdpa_aux *vdpa_aux; + struct pci_dev *pdev; int qid; + if (!pdsv) + return; + + pdev = pdsv->vdpa_aux->padev->vf_pdev; + vdpa_aux = pdsv->vdpa_aux; + if (!vdpa_aux->nintrs) return; @@ -612,6 +618,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, struct device *dma_dev; struct pci_dev *pdev; struct device *dev; + u8 status; int err; int i; @@ -638,6 +645,13 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, dma_dev = &pdev->dev; pdsv->vdpa_dev.dma_dev = dma_dev; + status = pds_vdpa_get_status(&pdsv->vdpa_dev); + if (status == 0xff) { + dev_err(dev, "Broken PCI - status %#x\n", status); + err = -ENXIO; + goto err_unmap; + } + pdsv->supported_features = mgmt->supported_features; if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h index d984ba24a7da..84bdb45871ff 100644 --- a/drivers/vdpa/pds/vdpa_dev.h +++ b/drivers/vdpa/pds/vdpa_dev.h @@ -46,5 +46,6 @@ struct pds_vdpa_device { #define PDS_VDPA_PACKED_INVERT_IDX 0x8000 +void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv); int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux); #endif /* _VDPA_DEV_H_ */ diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index 99428a04068d..55ec51c17ab3 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -555,53 +555,61 @@ static const struct vdpa_config_ops snet_config_ops = { static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) { - char name[50]; - int ret, i, mask = 0; + char *name; + unsigned short i; + bool bars_found = false; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev)); + if (!name) + return -ENOMEM; + /* We don't know which BAR will be used to communicate.. * We will map every bar with len > 0. * * Later, we will discover the BAR and unmap all other BARs. */ for (i = 0; i < PCI_STD_NUM_BARS; i++) { - if (pci_resource_len(pdev, i)) - mask |= (1 << i); + void __iomem *io; + + if (pci_resource_len(pdev, i) == 0) + continue; + + io = pcim_iomap_region(pdev, i, name); + if (IS_ERR(io)) { + SNET_ERR(pdev, "Failed to request and map PCI BARs\n"); + return PTR_ERR(io); + } + + psnet->bars[i] = io; + bars_found = true; } /* No BAR can be used.. */ - if (!mask) { + if (!bars_found) { SNET_ERR(pdev, "Failed to find a PCI BAR\n"); return -ENODEV; } - snprintf(name, sizeof(name), "psnet[%s]-bars", pci_name(pdev)); - ret = pcim_iomap_regions(pdev, mask, name); - if (ret) { - SNET_ERR(pdev, "Failed to request and map PCI BARs\n"); - return ret; - } - - for (i = 0; i < PCI_STD_NUM_BARS; i++) { - if (mask & (1 << i)) - psnet->bars[i] = pcim_iomap_table(pdev)[i]; - } - return 0; } static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet) { - char name[50]; - int ret; + char *name; + void __iomem *io; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "snet[%s]-bars", pci_name(pdev)); + if (!name) + return -ENOMEM; - snprintf(name, sizeof(name), "snet[%s]-bar", pci_name(pdev)); /* Request and map BAR */ - ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name); - if (ret) { + io = pcim_iomap_region(pdev, snet->psnet->cfg.vf_bar, name); + if (IS_ERR(io)) { SNET_ERR(pdev, "Failed to request and map PCI BAR for a VF\n"); - return ret; + return PTR_ERR(io); } - snet->bar = pcim_iomap_table(pdev)[snet->psnet->cfg.vf_bar]; + snet->bar = io; return 0; } @@ -650,15 +658,12 @@ static int psnet_detect_bar(struct psnet *psnet, u32 off) static void psnet_unmap_unused_bars(struct pci_dev *pdev, struct psnet *psnet) { - int i, mask = 0; + unsigned short i; for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (psnet->bars[i] && i != psnet->barno) - mask |= (1 << i); + pcim_iounmap_region(pdev, i); } - - if (mask) - pcim_iounmap_regions(pdev, mask); } /* Read SNET config from PCI BAR */ diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index d0695680b282..8a372b51c21a 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -65,7 +65,7 @@ static void vdpa_dev_remove(struct device *d) drv->remove(vdev); } -static int vdpa_dev_match(struct device *dev, struct device_driver *drv) +static int vdpa_dev_match(struct device *dev, const struct device_driver *drv) { struct vdpa_device *vdev = dev_to_vdpa(dev); @@ -98,7 +98,7 @@ static ssize_t driver_override_show(struct device *dev, ssize_t len; device_lock(dev); - len = snprintf(buf, PAGE_SIZE, "%s\n", vdev->driver_override); + len = sysfs_emit(buf, "%s\n", vdev->driver_override); device_unlock(dev); return len; @@ -115,7 +115,7 @@ static const struct attribute_group vdpa_dev_group = { }; __ATTRIBUTE_GROUPS(vdpa_dev); -static struct bus_type vdpa_bus = { +static const struct bus_type vdpa_bus = { .name = "vdpa", .dev_groups = vdpa_dev_groups, .match = vdpa_dev_match, @@ -945,6 +945,215 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms } static int +vdpa_dev_blk_capacity_config_fill(struct sk_buff *msg, + const struct virtio_blk_config *config) +{ + u64 val_u64; + + val_u64 = __virtio64_to_cpu(true, config->capacity); + + return nla_put_u64_64bit(msg, VDPA_ATTR_DEV_BLK_CFG_CAPACITY, + val_u64, VDPA_ATTR_PAD); +} + +static int +vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_blk_config *config) +{ + u32 val_u32; + + if ((features & BIT_ULL(VIRTIO_BLK_F_SIZE_MAX)) == 0) + return 0; + + val_u32 = __virtio32_to_cpu(true, config->size_max); + + return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32); +} + +/* fill the block size*/ +static int +vdpa_dev_blk_block_size_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_blk_config *config) +{ + u32 val_u32; + + if ((features & BIT_ULL(VIRTIO_BLK_F_BLK_SIZE)) == 0) + return 0; + + val_u32 = __virtio32_to_cpu(true, config->blk_size); + + return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, val_u32); +} + +static int +vdpa_dev_blk_seg_max_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_blk_config *config) +{ + u32 val_u32; + + if ((features & BIT_ULL(VIRTIO_BLK_F_SEG_MAX)) == 0) + return 0; + + val_u32 = __virtio32_to_cpu(true, config->seg_max); + + return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, val_u32); +} + +static int vdpa_dev_blk_mq_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_blk_config *config) +{ + u16 val_u16; + + if ((features & BIT_ULL(VIRTIO_BLK_F_MQ)) == 0) + return 0; + + val_u16 = __virtio16_to_cpu(true, config->num_queues); + + return nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, val_u16); +} + +static int vdpa_dev_blk_topology_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_blk_config *config) +{ + u16 min_io_size; + u32 opt_io_size; + + if ((features & BIT_ULL(VIRTIO_BLK_F_TOPOLOGY)) == 0) + return 0; + + min_io_size = __virtio16_to_cpu(true, config->min_io_size); + opt_io_size = __virtio32_to_cpu(true, config->opt_io_size); + + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_PHY_BLK_EXP, + config->physical_block_exp)) + return -EMSGSIZE; + + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_ALIGN_OFFSET, + config->alignment_offset)) + return -EMSGSIZE; + + if (nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_MIN_IO_SIZE, min_io_size)) + return -EMSGSIZE; + + if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_OPT_IO_SIZE, opt_io_size)) + return -EMSGSIZE; + + return 0; +} + +static int vdpa_dev_blk_discard_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_blk_config *config) +{ + u32 val_u32; + + if ((features & BIT_ULL(VIRTIO_BLK_F_DISCARD)) == 0) + return 0; + + val_u32 = __virtio32_to_cpu(true, config->max_discard_sectors); + if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEC, val_u32)) + return -EMSGSIZE; + + val_u32 = __virtio32_to_cpu(true, config->max_discard_seg); + if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEG, val_u32)) + return -EMSGSIZE; + + val_u32 = __virtio32_to_cpu(true, config->discard_sector_alignment); + if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN, val_u32)) + return -EMSGSIZE; + + return 0; +} + +static int +vdpa_dev_blk_write_zeroes_config_fill(struct sk_buff *msg, u64 features, + const struct virtio_blk_config *config) +{ + u32 val_u32; + + if ((features & BIT_ULL(VIRTIO_BLK_F_WRITE_ZEROES)) == 0) + return 0; + + val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_sectors); + if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, val_u32)) + return -EMSGSIZE; + + val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_seg); + if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, val_u32)) + return -EMSGSIZE; + + return 0; +} + +static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features) +{ + u8 ro; + + ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1; + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro)) + return -EMSGSIZE; + + return 0; +} + +static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features) +{ + u8 flush; + + flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1; + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush)) + return -EMSGSIZE; + + return 0; +} + +static int vdpa_dev_blk_config_fill(struct vdpa_device *vdev, + struct sk_buff *msg) +{ + struct virtio_blk_config config = {}; + u64 features_device; + + vdev->config->get_config(vdev, 0, &config, sizeof(config)); + + features_device = vdev->config->get_device_features(vdev); + + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device, + VDPA_ATTR_PAD)) + return -EMSGSIZE; + + if (vdpa_dev_blk_capacity_config_fill(msg, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_seg_size_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_block_size_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_seg_max_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_mq_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_topology_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_discard_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_write_zeroes_config_fill(msg, features_device, &config)) + return -EMSGSIZE; + + if (vdpa_dev_blk_ro_config_fill(msg, features_device)) + return -EMSGSIZE; + + if (vdpa_dev_blk_flush_config_fill(msg, features_device)) + return -EMSGSIZE; + + return 0; +} + +static int vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { @@ -988,6 +1197,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, case VIRTIO_ID_NET: err = vdpa_dev_net_config_fill(vdev, msg); break; + case VIRTIO_ID_BLOCK: + err = vdpa_dev_blk_config_fill(vdev, msg); + break; default: err = -EOPNOTSUPP; break; @@ -1149,6 +1361,80 @@ dev_err: return err; } +static int vdpa_dev_net_device_attr_set(struct vdpa_device *vdev, + struct genl_info *info) +{ + struct vdpa_dev_set_config set_config = {}; + struct vdpa_mgmt_dev *mdev = vdev->mdev; + struct nlattr **nl_attrs = info->attrs; + const u8 *macaddr; + int err = -EOPNOTSUPP; + + down_write(&vdev->cf_lock); + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { + set_config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); + macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); + + if (is_valid_ether_addr(macaddr)) { + ether_addr_copy(set_config.net.mac, macaddr); + if (mdev->ops->dev_set_attr) { + err = mdev->ops->dev_set_attr(mdev, vdev, + &set_config); + } else { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "Operation not supported by the device."); + } + } else { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "Invalid MAC address"); + } + } + up_write(&vdev->cf_lock); + return err; +} + +static int vdpa_nl_cmd_dev_attr_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct vdpa_device *vdev; + struct device *dev; + const char *name; + u64 classes; + int err = 0; + + if (!info->attrs[VDPA_ATTR_DEV_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + + down_write(&vdpa_dev_lock); + dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); + if (!dev) { + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); + err = -ENODEV; + goto dev_err; + } + vdev = container_of(dev, struct vdpa_device, dev); + if (!vdev->mdev) { + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); + err = -EINVAL; + goto mdev_err; + } + classes = vdpa_mgmtdev_get_classes(vdev->mdev, NULL); + if (classes & BIT_ULL(VIRTIO_ID_NET)) { + err = vdpa_dev_net_device_attr_set(vdev, info); + } else { + NL_SET_ERR_MSG_FMT_MOD(info->extack, "%s device not supported", + name); + } + +mdev_err: + put_device(dev); +dev_err: + up_write(&vdpa_dev_lock); + return err; +} + static int vdpa_dev_config_dump(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); @@ -1285,6 +1571,11 @@ static const struct genl_ops vdpa_nl_ops[] = { .doit = vdpa_nl_cmd_dev_stats_get_doit, .flags = GENL_ADMIN_PERM, }, + { + .cmd = VDPA_CMD_DEV_ATTR_SET, + .doit = vdpa_nl_cmd_dev_attr_set_doit, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family vdpa_nl_family __ro_after_init = { @@ -1326,4 +1617,5 @@ core_initcall(vdpa_init); module_exit(vdpa_exit); MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>"); +MODULE_DESCRIPTION("vDPA bus"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index be2925d0d283..c204fc8e471a 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -160,7 +160,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim, u32 flags) } } - vdpasim->running = true; + vdpasim->running = false; spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; @@ -229,7 +229,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, dev = &vdpasim->vdpa.dev; kthread_init_work(&vdpasim->work, vdpasim_work_fn); - vdpasim->worker = kthread_create_worker(0, "vDPA sim worker: %s", + vdpasim->worker = kthread_run_worker(0, "vDPA sim worker: %s", dev_attr->name); if (IS_ERR(vdpasim->worker)) goto err_iommu; @@ -311,6 +311,17 @@ static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) vq->num = num; } +static u16 vdpasim_get_vq_size(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + if (vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK) + return vq->num; + else + return VDPASIM_QUEUE_MAX; +} + static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -483,6 +494,7 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) mutex_lock(&vdpasim->mutex); vdpasim->status = status; + vdpasim->running = (status & VIRTIO_CONFIG_S_DRIVER_OK) != 0; mutex_unlock(&vdpasim->mutex); } @@ -774,6 +786,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .get_driver_features = vdpasim_get_driver_features, .set_config_cb = vdpasim_set_config_cb, .get_vq_num_max = vdpasim_get_vq_num_max, + .get_vq_size = vdpasim_get_vq_size, .get_device_id = vdpasim_get_device_id, .get_vendor_id = vdpasim_get_vendor_id, .get_status = vdpasim_get_status, diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index cfe962911804..6caf09a1907b 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -414,6 +414,24 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); } +static int vdpasim_net_set_attr(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *config) +{ + struct vdpasim *vdpasim = container_of(dev, struct vdpasim, vdpa); + struct virtio_net_config *vio_config = vdpasim->config; + + mutex_lock(&vdpasim->mutex); + + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + ether_addr_copy(vio_config->mac, config->net.mac); + mutex_unlock(&vdpasim->mutex); + return 0; + } + + mutex_unlock(&vdpasim->mutex); + return -EOPNOTSUPP; +} + static void vdpasim_net_setup_config(struct vdpasim *vdpasim, const struct vdpa_dev_set_config *config) { @@ -510,7 +528,8 @@ static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev, static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = { .dev_add = vdpasim_net_dev_add, - .dev_del = vdpasim_net_dev_del + .dev_del = vdpasim_net_dev_del, + .dev_set_attr = vdpasim_net_set_attr }; static struct virtio_device_id id_table[] = { diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 5e4a77b9bae6..58116f89d8da 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -162,6 +162,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, enum dma_data_direction dir) { struct vduse_bounce_map *map; + struct page *page; unsigned int offset; void *addr; size_t sz; @@ -178,7 +179,10 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, map->orig_phys == INVALID_PHYS_ADDR)) return; - addr = kmap_local_page(map->bounce_page); + page = domain->user_bounce_pages ? + map->user_bounce_page : map->bounce_page; + + addr = kmap_local_page(page); do_bounce(map->orig_phys + offset, addr + offset, sz, dir); kunmap_local(addr); size -= sz; @@ -270,9 +274,8 @@ int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, memcpy_to_page(pages[i], 0, page_address(map->bounce_page), PAGE_SIZE); - __free_page(map->bounce_page); } - map->bounce_page = pages[i]; + map->user_bounce_page = pages[i]; get_page(pages[i]); } domain->user_bounce_pages = true; @@ -297,17 +300,17 @@ void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) struct page *page = NULL; map = &domain->bounce_maps[i]; - if (WARN_ON(!map->bounce_page)) + if (WARN_ON(!map->user_bounce_page)) continue; /* Copy user page to kernel page if it's in use */ if (map->orig_phys != INVALID_PHYS_ADDR) { - page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL); + page = map->bounce_page; memcpy_from_page(page_address(page), - map->bounce_page, 0, PAGE_SIZE); + map->user_bounce_page, 0, PAGE_SIZE); } - put_page(map->bounce_page); - map->bounce_page = page; + put_page(map->user_bounce_page); + map->user_bounce_page = NULL; } domain->user_bounce_pages = false; out: @@ -373,6 +376,26 @@ static void vduse_domain_free_iova(struct iova_domain *iovad, free_iova_fast(iovad, iova >> shift, iova_len); } +void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + read_lock(&domain->bounce_lock); + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE); + read_unlock(&domain->bounce_lock); +} + +void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + read_lock(&domain->bounce_lock); + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); + read_unlock(&domain->bounce_lock); +} + dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, @@ -393,7 +416,8 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) goto err_unlock; - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); read_unlock(&domain->bounce_lock); @@ -411,9 +435,9 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, enum dma_data_direction dir, unsigned long attrs) { struct iova_domain *iovad = &domain->stream_iovad; - read_lock(&domain->bounce_lock); - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h index 173e979b84a9..7f3f0928ec78 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.h +++ b/drivers/vdpa/vdpa_user/iova_domain.h @@ -21,6 +21,7 @@ struct vduse_bounce_map { struct page *bounce_page; + struct page *user_bounce_page; u64 orig_phys; }; @@ -44,6 +45,14 @@ int vduse_domain_set_map(struct vduse_iova_domain *domain, void vduse_domain_clear_map(struct vduse_iova_domain *domain, struct vhost_iotlb *iotlb); +void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir); + +void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir); + dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 1d24da79c399..7ae99691efdf 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -8,6 +8,7 @@ * */ +#include "linux/virtio_net.h" #include <linux/init.h> #include <linux/module.h> #include <linux/cdev.h> @@ -28,6 +29,7 @@ #include <uapi/linux/virtio_config.h> #include <uapi/linux/virtio_ids.h> #include <uapi/linux/virtio_blk.h> +#include <uapi/linux/virtio_ring.h> #include <linux/mod_devicetable.h> #include "iova_domain.h" @@ -141,6 +143,7 @@ static struct workqueue_struct *vduse_irq_bound_wq; static u32 allowed_device_id[] = { VIRTIO_ID_BLOCK, + VIRTIO_ID_NET, }; static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) @@ -541,6 +544,17 @@ static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) vq->num = num; } +static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = dev->vqs[idx]; + + if (vq->num) + return vq->num; + else + return vq->num_max; +} + static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) { @@ -773,6 +787,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { .kick_vq = vduse_vdpa_kick_vq, .set_vq_cb = vduse_vdpa_set_vq_cb, .set_vq_num = vduse_vdpa_set_vq_num, + .get_vq_size = vduse_vdpa_get_vq_size, .set_vq_ready = vduse_vdpa_set_vq_ready, .get_vq_ready = vduse_vdpa_get_vq_ready, .set_vq_state = vduse_vdpa_set_vq_state, @@ -798,6 +813,26 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { .free = vduse_vdpa_free, }; +static void vduse_dev_sync_single_for_device(struct device *dev, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); +} + +static void vduse_dev_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); +} + static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, @@ -858,6 +893,8 @@ static size_t vduse_dev_max_mapping_size(struct device *dev) } static const struct dma_map_ops vduse_dev_dma_ops = { + .sync_single_for_device = vduse_dev_sync_single_for_device, + .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, .map_page = vduse_dev_map_page, .unmap_page = vduse_dev_unmap_page, .alloc = vduse_dev_alloc_coherent, @@ -1671,13 +1708,21 @@ static bool device_is_allowed(u32 device_id) return false; } -static bool features_is_valid(u64 features) +static bool features_is_valid(struct vduse_dev_config *config) { - if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) + if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) return false; /* Now we only support read-only configuration space */ - if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) + if ((config->device_id == VIRTIO_ID_BLOCK) && + (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE))) + return false; + else if ((config->device_id == VIRTIO_ID_NET) && + (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) + return false; + + if ((config->device_id == VIRTIO_ID_NET) && + !(config->features & BIT_ULL(VIRTIO_F_VERSION_1))) return false; return true; @@ -1704,7 +1749,7 @@ static bool vduse_validate_config(struct vduse_dev_config *config) if (!device_is_allowed(config->device_id)) return false; - if (!features_is_valid(config->features)) + if (!features_is_valid(config)) return false; return true; @@ -1787,6 +1832,10 @@ static int vduse_create_dev(struct vduse_dev_config *config, int ret; struct vduse_dev *dev; + ret = -EPERM; + if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN)) + goto err; + ret = -EEXIST; if (vduse_find_dev(config->name)) goto err; @@ -2030,6 +2079,7 @@ static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { static struct virtio_device_id id_table[] = { { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, + { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, { 0 }, }; diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 281287fae89f..8787407f75b0 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -160,7 +160,13 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) struct pci_dev *pdev = mdev->pci_dev; int i, ret, irq; int queues = vp_vdpa->queues; - int vectors = queues + 1; + int vectors = 1; + int msix_vec = 0; + + for (i = 0; i < queues; i++) { + if (vp_vdpa->vring[i].cb.callback) + vectors++; + } ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX); if (ret != vectors) { @@ -173,9 +179,12 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) vp_vdpa->vectors = vectors; for (i = 0; i < queues; i++) { + if (!vp_vdpa->vring[i].cb.callback) + continue; + snprintf(vp_vdpa->vring[i].msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-%d\n", pci_name(pdev), i); - irq = pci_irq_vector(pdev, i); + irq = pci_irq_vector(pdev, msix_vec); ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_vq_handler, 0, vp_vdpa->vring[i].msix_name, @@ -185,21 +194,22 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) "vp_vdpa: fail to request irq for vq %d\n", i); goto err; } - vp_modern_queue_vector(mdev, i, i); + vp_modern_queue_vector(mdev, i, msix_vec); vp_vdpa->vring[i].irq = irq; + msix_vec++; } snprintf(vp_vdpa->msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-config\n", pci_name(pdev)); - irq = pci_irq_vector(pdev, queues); + irq = pci_irq_vector(pdev, msix_vec); ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_config_handler, 0, vp_vdpa->msix_name, vp_vdpa); if (ret) { dev_err(&pdev->dev, - "vp_vdpa: fail to request irq for vq %d\n", i); + "vp_vdpa: fail to request irq for config: %d\n", ret); goto err; } - vp_modern_config_vector(mdev, queues); + vp_modern_config_vector(mdev, msix_vec); vp_vdpa->config_irq = irq; return 0; @@ -216,7 +226,10 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) if (status & VIRTIO_CONFIG_S_DRIVER_OK && !(s & VIRTIO_CONFIG_S_DRIVER_OK)) { - vp_vdpa_request_irq(vp_vdpa); + if (vp_vdpa_request_irq(vp_vdpa)) { + WARN_ON(1); + return; + } } vp_modern_set_status(mdev, status); @@ -328,6 +341,13 @@ static void vp_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid, vp_modern_set_queue_size(mdev, qid, num); } +static u16 vp_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 qid) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return vp_modern_get_queue_size(mdev, qid); +} + static int vp_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid, u64 desc_area, u64 driver_area, u64 device_area) @@ -347,6 +367,14 @@ static void vp_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid) vp_iowrite16(qid, vp_vdpa->vring[qid].notify); } +static void vp_vdpa_kick_vq_with_data(struct vdpa_device *vdpa, u32 data) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + u16 qid = data & 0xFFFF; + + vp_iowrite32(data, vp_vdpa->vring[qid].notify); +} + static u32 vp_vdpa_get_generation(struct vdpa_device *vdpa) { struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); @@ -449,8 +477,10 @@ static const struct vdpa_config_ops vp_vdpa_ops = { .set_vq_ready = vp_vdpa_set_vq_ready, .get_vq_ready = vp_vdpa_get_vq_ready, .set_vq_num = vp_vdpa_set_vq_num, + .get_vq_size = vp_vdpa_get_vq_size, .set_vq_address = vp_vdpa_set_vq_address, .kick_vq = vp_vdpa_kick_vq, + .kick_vq_with_data = vp_vdpa_kick_vq_with_data, .get_generation = vp_vdpa_get_generation, .get_device_id = vp_vdpa_get_device_id, .get_vendor_id = vp_vdpa_get_vendor_id, @@ -591,7 +621,11 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto mdev_err; } - mdev_id = kzalloc(sizeof(struct virtio_device_id), GFP_KERNEL); + /* + * id_table should be a null terminated array, so allocate one additional + * entry here, see vdpa_mgmtdev_get_classes(). + */ + mdev_id = kcalloc(2, sizeof(struct virtio_device_id), GFP_KERNEL); if (!mdev_id) { err = -ENOMEM; goto mdev_id_err; @@ -611,8 +645,8 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto probe_err; } - mdev_id->device = mdev->id.device; - mdev_id->vendor = mdev->id.vendor; + mdev_id[0].device = mdev->id.device; + mdev_id[0].vendor = mdev->id.vendor; mgtdev->id_table = mdev_id; mgtdev->max_supported_vqs = vp_modern_get_num_queues(mdev); mgtdev->supported_features = vp_modern_get_features(mdev); |