diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/ah.c | 33 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 155 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 47 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 32 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/flow.c | 99 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 109 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.h | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 662 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 118 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 52 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 132 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 161 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 76 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq_cmd.c | 35 |
16 files changed, 1095 insertions, 644 deletions
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 420ae0897333..80642dd359bc 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -32,9 +32,8 @@ #include "mlx5_ib.h" -static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, - struct mlx5_ib_ah *ah, - struct rdma_ah_attr *ah_attr) +static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, + struct rdma_ah_attr *ah_attr) { enum ib_gid_type gid_type; @@ -67,21 +66,19 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf); } - - return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct mlx5_ib_ah *ah; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_ah *ah = to_mah(ibah); + struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_ah_attr_type ah_type = ah_attr->type; if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) && !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) { int err; @@ -90,21 +87,18 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, sizeof(resp.dmac); if (udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); + return -EINVAL; resp.response_length = min_resp_len; memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - return ERR_PTR(err); + return err; } - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - - return create_ib_ah(dev, ah, ah_attr); /* never fails */ + create_ib_ah(dev, ah, ah_attr); + return 0; } int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -131,8 +125,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_mah(ah)); - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index be95ac5aeb30..e3ec79b8f7f5 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -82,10 +82,10 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); } -int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, - u64 length, u32 alignment) +int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, + u64 length, u32 alignment) { - struct mlx5_core_dev *dev = memic->dev; + struct mlx5_core_dev *dev = dm->dev; u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size) >> PAGE_SHIFT; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); @@ -115,17 +115,17 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, mlx5_alignment); while (page_idx < num_memic_hw_pages) { - spin_lock(&memic->memic_lock); - page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages, + spin_lock(&dm->lock); + page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages, num_memic_hw_pages, page_idx, num_pages, 0); if (page_idx < num_memic_hw_pages) - bitmap_set(memic->memic_alloc_pages, + bitmap_set(dm->memic_alloc_pages, page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); if (page_idx >= num_memic_hw_pages) break; @@ -135,10 +135,10 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (ret) { - spin_lock(&memic->memic_lock); - bitmap_clear(memic->memic_alloc_pages, + spin_lock(&dm->lock); + bitmap_clear(dm->memic_alloc_pages, page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); if (ret == -EAGAIN) { page_idx++; @@ -157,9 +157,9 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, return -ENOMEM; } -int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) { - struct mlx5_core_dev *dev = memic->dev; + struct mlx5_core_dev *dev = dm->dev; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; @@ -177,15 +177,140 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) { - spin_lock(&memic->memic_lock); - bitmap_clear(memic->memic_alloc_pages, + spin_lock(&dm->lock); + bitmap_clear(dm->memic_alloc_pages, start_page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); } return err; } +int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t *addr, u32 *obj_id) +{ + struct mlx5_core_dev *dev = dm->dev; + u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, + max_blocks, + 0, + num_blocks, 0); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t addr, u32 obj_id) +{ + struct mlx5_core_dev *dev = dm->dev; + u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + unsigned long *block_map; + u64 start_idx; + int err; + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + start_idx = + (addr - MLX5_CAP64_DEV_MEM( + dev, steering_sw_icm_start_address)) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + start_idx = + (addr - + MLX5_CAP64_DEV_MEM( + dev, header_modify_sw_icm_start_address)) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} + int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) { u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 923a7b93f507..0572dcba6eae 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -44,9 +44,9 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); -int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, +int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); -int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); @@ -65,4 +65,8 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); +int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t *addr, u32 *obj_id); +int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t addr, u32 obj_id); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 18704e503508..2e2e65f00257 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -679,8 +679,7 @@ static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) } static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, - struct ib_ucontext *context, struct mlx5_ib_cq *cq, - int entries, u32 **cqb, + struct mlx5_ib_cq *cq, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; @@ -691,6 +690,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int ncont; void *cqc; int err; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); ucmdlen = udata->inlen < sizeof(ucmd) ? (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd); @@ -715,8 +716,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr, - &cq->db); + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; @@ -740,7 +740,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = to_mucontext(context)->bfregi.sys_pages[0]; + *index = context->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { int mini_cqe_format; @@ -782,23 +782,26 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; } - MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid); + MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid); return 0; err_cqb: kvfree(*cqb); err_db: - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + mlx5_ib_db_unmap_user(context, &cq->db); err_umem: ib_umem_release(cq->buf.umem); return err; } -static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + mlx5_ib_db_unmap_user(context, &cq->db); ib_umem_release(cq->buf.umem); } @@ -883,7 +886,6 @@ static void notify_soft_wc_handler(struct work_struct *work) struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -923,9 +925,9 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->list_send_qp); INIT_LIST_HEAD(&cq->list_recv_qp); - if (context) { - err = create_cq_user(dev, udata, context, cq, entries, - &cqb, &cqe_size, &index, &inlen); + if (udata) { + err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, + &index, &inlen); if (err) goto err_create; } else { @@ -962,7 +964,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); cq->mcq.irqn = irqn; - if (context) + if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else cq->mcq.comp = mlx5_ib_cq_comp; @@ -970,7 +972,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->wc_list); - if (context) + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; goto err_cmd; @@ -985,8 +987,8 @@ err_cmd: err_cqb: kvfree(cqb); - if (context) - destroy_cq_user(cq, context); + if (udata) + destroy_cq_user(cq, udata); else destroy_cq_kernel(dev, cq); @@ -996,19 +998,14 @@ err_create: return ERR_PTR(err); } - -int mlx5_ib_destroy_cq(struct ib_cq *cq) +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); - struct ib_ucontext *context = NULL; - - if (cq->uobject) - context = cq->uobject->context; mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); - if (context) - destroy_cq_user(mcq, context); + if (udata) + destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e08df7914aa..169ffffcf5ed 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -85,6 +85,10 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (is_user && capable(CAP_NET_RAW) && (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) cap |= MLX5_UCTX_CAP_RAW_TX; + if (is_user && capable(CAP_SYS_RAWIO) && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_INTERNAL_DEV_RES)) + cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES; MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); MLX5_SET(uctx, uctx, cap, cap); @@ -373,8 +377,10 @@ static u64 devx_get_obj_id(const void *in) return obj_id; } -static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) +static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, + struct ib_uobject *uobj, const void *in) { + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); u64 obj_id = devx_get_obj_id(in); if (!obj_id) @@ -389,7 +395,6 @@ static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) case UVERBS_OBJECT_SRQ: { struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); u16 opcode; switch (srq->common.res) { @@ -681,6 +686,7 @@ static bool devx_is_whitelist_cmd(void *in) switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: return true; default: return false; @@ -718,6 +724,7 @@ static bool devx_is_general_cmd(void *in) switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_VPORT_STATE: case MLX5_CMD_OP_QUERY_ADAPTER: case MLX5_CMD_OP_QUERY_ISSI: @@ -1117,7 +1124,8 @@ static void devx_cleanup_mkey(struct devx_obj *obj) } static int devx_obj_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj = uobject->object; @@ -1135,7 +1143,8 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, return ret; if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + struct mlx5_ib_dev *dev = + mlx5_udata_to_mdev(&attrs->driver_udata); call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, devx_free_indirect_mkey); @@ -1260,7 +1269,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1302,7 +1311,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1350,7 +1359,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); ev_file = container_of(uobj, struct devx_async_cmd_event_file, uobj); @@ -1412,7 +1421,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( if (err) return err; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; fd_uobj = uverbs_attr_get_uobject(attrs, @@ -1599,7 +1608,8 @@ err_obj_free: } static int devx_umem_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct devx_umem *obj = uobject->object; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; @@ -1704,7 +1714,7 @@ static __poll_t devx_async_cmd_event_poll(struct file *filp, return pollflags; } -const struct file_operations devx_async_cmd_event_fops = { +static const struct file_operations devx_async_cmd_event_fops = { .owner = THIS_MODULE, .read = devx_async_cmd_event_read, .poll = devx_async_cmd_event_poll, @@ -1900,7 +1910,7 @@ static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); - return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); + return MLX5_CAP_GEN(dev->mdev, log_max_uctx); } const struct uapi_definition mlx5_ib_devx_defs[] = { diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 798591a18484..1fc302d41a53 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -29,6 +29,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: *namespace = MLX5_FLOW_NAMESPACE_EGRESS; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: + *namespace = MLX5_FLOW_NAMESPACE_FDB; + break; default: return -EINVAL; } @@ -75,7 +78,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct ib_qp *qp = NULL; struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); int len, ret, i; u32 counter_id = 0; @@ -93,6 +96,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; + /* Allow only DEVX object as dest when inserting to FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx) + return -EINVAL; + if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -104,6 +111,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( */ if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) return -EINVAL; + /* Allow only flow table as dest when inserting to FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && + dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + return -EINVAL; } else if (dest_qp) { struct mlx5_ib_qp *mqp; @@ -189,7 +200,8 @@ err_out: } static int flow_matcher_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; int ret; @@ -202,21 +214,67 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, return 0; } +static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *obj) +{ + enum mlx5_ib_uapi_flow_table_type ft_type = + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; + u32 flags; + int err; + + /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older + * users should switch to it. We leave this to not break userspace + */ + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) + return -EINVAL; + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { + err = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); + if (err) + return err; + + err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); + if (err) + return err; + + return 0; + } + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + return err; + + if (flags) { + mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, + &obj->ns_type); + return 0; + } + } + + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; + + return 0; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); struct mlx5_ib_flow_matcher *obj; - u32 flags; int err; obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); if (!obj) return -ENOMEM; - obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; obj->mask_len = uverbs_attr_get_len( attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); err = uverbs_copy_from(&obj->matcher_mask, @@ -242,19 +300,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end; - err = uverbs_get_flags32(&flags, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - IB_FLOW_ATTR_FLAGS_EGRESS); + err = mlx5_ib_matcher_ns(attrs, obj); if (err) goto end; - if (flags) { - err = mlx5_ib_ft_type_to_namespace( - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); - if (err) - goto end; - } - uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@ -326,7 +375,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); enum mlx5_ib_uapi_flow_table_type ft_type; struct ib_flow_action *action; int num_actions; @@ -353,7 +402,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( if (IS_ERR(action)) return PTR_ERR(action); - uverbs_flow_action_fill_action(action, uobj, uobj->context->device, + uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, IB_FLOW_ACTION_UNSPECIFIED); return 0; @@ -445,7 +494,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; enum mlx5_ib_uapi_flow_table_type ft_type; struct mlx5_ib_flow_action *maction; @@ -493,8 +542,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( goto free_maction; } - uverbs_flow_action_fill_action(&maction->ib_action, uobj, - uobj->context->device, + uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, IB_FLOW_ACTION_UNSPECIFIED); return 0; @@ -605,6 +653,9 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, enum ib_flow_flags, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( @@ -619,15 +670,9 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); -static bool flow_is_supported(struct ib_device *device) -{ - return !to_mdev(device)->rep; -} - const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( - MLX5_IB_OBJECT_FLOW_MATCHER, - UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( UVERBS_OBJECT_FLOW, &mlx5_ib_fs), diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index b8639ac71336..cbcc40d776b9 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -7,69 +7,59 @@ #include "ib_rep.h" #include "srq.h" -static const struct mlx5_ib_profile vf_rep_profile = { - STAGE_CREATE(MLX5_IB_STAGE_INIT, - mlx5_ib_stage_init_init, - mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_rep_flow_db_init, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_CAPS, - mlx5_ib_stage_caps_init, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, - mlx5_ib_stage_rep_non_default_cb, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_rep_roce_init, - mlx5_ib_stage_rep_roce_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SRQ, - mlx5_init_srq_table, - mlx5_cleanup_srq_table), - STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_BFREG, - mlx5_ib_stage_bfrag_init, - mlx5_ib_stage_bfrag_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, - NULL, - mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_IB_REG, - mlx5_ib_stage_ib_reg_init, - mlx5_ib_stage_ib_reg_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, - mlx5_ib_stage_post_ib_reg_umr_init, - NULL), -}; +static int +mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5_ib_dev *ibdev; + int vport_index; + + ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch); + vport_index = ibdev->free_port++; + + ibdev->port[vport_index].rep = rep; + write_lock(&ibdev->port[vport_index].roce.netdev_lock); + ibdev->port[vport_index].roce.netdev = + mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); + write_unlock(&ibdev->port[vport_index].roce.netdev_lock); + + return 0; +} static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + int num_ports = MLX5_TOTAL_VPORTS(dev); const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; + int vport_index; if (rep->vport == MLX5_VPORT_UPLINK) profile = &uplink_rep_profile; else - profile = &vf_rep_profile; + return mlx5_ib_set_vport_rep(dev, rep); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) return -ENOMEM; - ibdev->rep = rep; - ibdev->mdev = dev; - ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), - MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, profile)) { + ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), + GFP_KERNEL); + if (!ibdev->port) { ib_dealloc_device(&ibdev->ib_dev); - return -EINVAL; + return -ENOMEM; } + ibdev->is_rep = true; + vport_index = ibdev->free_port++; + ibdev->port[vport_index].rep = rep; + ibdev->port[vport_index].roce.netdev = + mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); + ibdev->mdev = dev; + ibdev->num_ports = num_ports; + + if (!__mlx5_ib_add(ibdev, profile)) + return -EINVAL; + rep->rep_if[REP_IB].priv = ibdev; return 0; @@ -80,13 +70,13 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *dev; - if (!rep->rep_if[REP_IB].priv) + if (!rep->rep_if[REP_IB].priv || + rep->vport != MLX5_VPORT_UPLINK) return; dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); rep->rep_if[REP_IB].priv = NULL; - ib_dealloc_device(&dev->ib_dev); } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) @@ -140,22 +130,21 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport) return mlx5_eswitch_vport_rep(esw, vport); } -int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port) { - struct mlx5_flow_handle *flow_rule; struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep; - if (!dev->rep) - return 0; + if (!dev->is_rep || !port) + return NULL; - flow_rule = - mlx5_eswitch_add_send_to_vport_rule(esw, - dev->rep->vport, - sq->base.mqp.qpn); - if (IS_ERR(flow_rule)) - return PTR_ERR(flow_rule); - sq->flow_rule = flow_rule; + if (!dev->port[port - 1].rep) + return ERR_PTR(-EINVAL); - return 0; + rep = dev->port[port - 1].rep; + + return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport, + sq->base.mqp.qpn); } diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 798d41e61fb4..1d9778da8a50 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -20,8 +20,9 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport_index); void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); -int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq); +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, int vport_index); #else /* CONFIG_MLX5_ESWITCH */ @@ -52,10 +53,12 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} -static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +static inline +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port) { - return 0; + return NULL; } static inline diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1aaa2056d188..abac70ad5c7c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -156,6 +156,34 @@ static int get_port_state(struct ib_device *ibdev, return ret; } +static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, + struct net_device *ndev, + u8 *port_num) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct net_device *rep_ndev; + struct mlx5_ib_port *port; + int i; + + for (i = 0; i < dev->num_ports; i++) { + port = &dev->port[i]; + if (!port->rep) + continue; + + read_lock(&port->roce.netdev_lock); + rep_ndev = mlx5_ib_get_rep_netdev(esw, + port->rep->vport); + if (rep_ndev == ndev) { + read_unlock(&port->roce.netdev_lock); + *port_num = i + 1; + return &port->roce; + } + read_unlock(&port->roce.netdev_lock); + } + + return NULL; +} + static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -172,22 +200,17 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: + /* Should already be registered during the load */ + if (ibdev->is_rep) + break; write_lock(&roce->netdev_lock); - if (ibdev->rep) { - struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; - struct net_device *rep_ndev; - - rep_ndev = mlx5_ib_get_rep_netdev(esw, - ibdev->rep->vport); - if (rep_ndev == ndev) - roce->netdev = ndev; - } else if (ndev->dev.parent == mdev->device) { + if (ndev->dev.parent == mdev->device) roce->netdev = ndev; - } write_unlock(&roce->netdev_lock); break; case NETDEV_UNREGISTER: + /* In case of reps, ib device goes away before the netdevs */ write_lock(&roce->netdev_lock); if (roce->netdev == ndev) roce->netdev = NULL; @@ -205,6 +228,10 @@ static int mlx5_netdev_event(struct notifier_block *this, dev_put(lag_ndev); } + if (ibdev->is_rep) + roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); + if (!roce) + return NOTIFY_DONE; if ((upper == ndev || (!upper && ndev == roce->netdev)) && ibdev->ib_active) { struct ib_event ibev = { }; @@ -257,11 +284,11 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, /* Ensure ndev does not disappear before we invoke dev_hold() */ - read_lock(&ibdev->roce[port_num - 1].netdev_lock); - ndev = ibdev->roce[port_num - 1].netdev; + read_lock(&ibdev->port[port_num - 1].roce.netdev_lock); + ndev = ibdev->port[port_num - 1].roce.netdev; if (ndev) dev_hold(ndev); - read_unlock(&ibdev->roce[port_num - 1].netdev_lock); + read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock); out: mlx5_ib_put_native_port_mdev(ibdev, port_num); @@ -479,9 +506,14 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. + * Use native port in case of reps */ - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, - mdev_port_num); + if (dev->is_rep) + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + 1); + else + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + mdev_port_num); if (err) goto out; ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); @@ -542,52 +574,22 @@ out: return err; } -struct mlx5_ib_vlan_info { - u16 vlan_id; - bool vlan; -}; - -static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) -{ - struct mlx5_ib_vlan_info *vlan_info = data; - - if (is_vlan_dev(lower_dev)) { - vlan_info->vlan = true; - vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev); - } - /* We are interested only in first level vlan device, so - * always return 1 to stop iterating over next level devices. - */ - return 1; -} - static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum ib_gid_type gid_type = IB_GID_TYPE_IB; - struct mlx5_ib_vlan_info vlan_info = { }; + u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; u8 mac[ETH_ALEN]; + int ret; if (gid) { gid_type = attr->gid_type; - ether_addr_copy(mac, attr->ndev->dev_addr); - - if (is_vlan_dev(attr->ndev)) { - vlan_info.vlan = true; - vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev); - } else { - /* If the netdev is upper device and if it's lower - * lower device is vlan device, consider vlan id of - * the lower vlan device for this gid entry. - */ - rcu_read_lock(); - netdev_walk_all_lower_dev_rcu(attr->ndev, - get_lower_dev_vlan, &vlan_info); - rcu_read_unlock(); - } + ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); + if (ret) + return ret; } switch (gid_type) { @@ -608,7 +610,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, roce_l3_type, gid->raw, mac, - vlan_info.vlan, vlan_info.vlan_id, + vlan_id < VLAN_CFI_MASK, vlan_id, port_num); } @@ -1407,7 +1409,9 @@ static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, { int ret; - /* Only link layer == ethernet is valid for representors */ + /* Only link layer == ethernet is valid for representors + * and we always use port 1 + */ ret = mlx5_query_port_roce(ibdev, port, props); if (ret || !props) return ret; @@ -1954,11 +1958,11 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, print_lib_caps(dev, context->lib_caps); if (dev->lag_active) { - u8 port = mlx5_core_native_port_num(dev->mdev); + u8 port = mlx5_core_native_port_num(dev->mdev) - 1; atomic_set(&context->tx_port_affinity, atomic_add_return( - 1, &dev->roce[port].tx_port_affinity)); + 1, &dev->port[port].roce.tx_port_affinity)); } return 0; @@ -2060,21 +2064,22 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) { - if (vma->vm_end - vma->vm_start != PAGE_SIZE) + if ((vma->vm_end - vma->vm_start != PAGE_SIZE) || + !(vma->vm_flags & VM_SHARED)) return -EINVAL; if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1) return -EOPNOTSUPP; - if (vma->vm_flags & VM_WRITE) + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) return -EPERM; vma->vm_flags &= ~VM_MAYWRITE; - if (!dev->mdev->clock_info_page) + if (!dev->mdev->clock_info) return -EOPNOTSUPP; - return rdma_user_mmap_page(&context->ibucontext, vma, - dev->mdev->clock_info_page, PAGE_SIZE); + return vm_insert_page(vma, vma->vm_start, + virt_to_page(dev->mdev->clock_info)); } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, @@ -2259,89 +2264,200 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } -struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_dm_alloc_attr *attr, - struct uverbs_attr_bundle *attrs) +static inline int check_dm_type_support(struct mlx5_ib_dev *dev, + u32 type) { - u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); - struct mlx5_memic *memic = &to_mdev(ibdev)->memic; - phys_addr_t memic_addr; - struct mlx5_ib_dm *dm; + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + if (!MLX5_CAP_DEV_MEM(dev->mdev, memic)) + return -EOPNOTSUPP; + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + if (!capable(CAP_SYS_RAWIO) || + !capable(CAP_NET_RAW)) + return -EPERM; + + if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner))) + return -EOPNOTSUPP; + break; + } + + return 0; +} + +static int handle_alloc_dm_memic(struct ib_ucontext *ctx, + struct mlx5_ib_dm *dm, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; u64 start_offset; u32 page_idx; int err; - dm = kzalloc(sizeof(*dm), GFP_KERNEL); - if (!dm) - return ERR_PTR(-ENOMEM); - - mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n", - attr->length, act_size, attr->alignment); + dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); - err = mlx5_cmd_alloc_memic(memic, &memic_addr, - act_size, attr->alignment); + err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr, + dm->size, attr->alignment); if (err) - goto err_free; + return err; - start_offset = memic_addr & ~PAGE_MASK; - page_idx = (memic_addr - memic->dev->bar_addr - - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> + page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >> PAGE_SHIFT; err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + &page_idx, sizeof(page_idx)); + if (err) + goto err_dealloc; + + start_offset = dm->dev_addr & ~PAGE_MASK; + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, &start_offset, sizeof(start_offset)); if (err) goto err_dealloc; + bitmap_set(to_mucontext(ctx)->dm_pages, page_idx, + DIV_ROUND_UP(dm->size, PAGE_SIZE)); + + return 0; + +err_dealloc: + mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); + + return err; +} + +static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, + struct mlx5_ib_dm *dm, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs, + int type) +{ + struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; + u64 act_size; + int err; + + /* Allocation size must a multiple of the basic block size + * and a power of 2. + */ + act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = roundup_pow_of_two(act_size); + + dm->size = act_size; + err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size, + to_mucontext(ctx)->devx_uid, &dm->dev_addr, + &dm->icm_dm.obj_id); + if (err) + return err; + err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - &page_idx, sizeof(page_idx)); + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + &dm->dev_addr, sizeof(dm->dev_addr)); if (err) - goto err_dealloc; + mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size, + to_mucontext(ctx)->devx_uid, + dm->dev_addr, dm->icm_dm.obj_id); + + return err; +} + +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dm *dm; + enum mlx5_ib_uapi_dm_type type; + int err; - bitmap_set(to_mucontext(context)->dm_pages, page_idx, - DIV_ROUND_UP(act_size, PAGE_SIZE)); + err = uverbs_get_const_default(&type, attrs, + MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, + MLX5_IB_UAPI_DM_TYPE_MEMIC); + if (err) + return ERR_PTR(err); - dm->dev_addr = memic_addr; + mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n", + type, attr->length, attr->alignment); + + err = check_dm_type_support(to_mdev(ibdev), type); + if (err) + return ERR_PTR(err); + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + dm->type = type; + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + err = handle_alloc_dm_memic(context, dm, + attr, + attrs); + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type); + break; + default: + err = -EOPNOTSUPP; + } + + if (err) + goto err_free; return &dm->ibdm; -err_dealloc: - mlx5_cmd_dealloc_memic(memic, memic_addr, - act_size); err_free: kfree(dm); return ERR_PTR(err); } -int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { - struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic; + struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); - u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE); u32 page_idx; int ret; - ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size); - if (ret) - return ret; + switch (dm->type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); + if (ret) + return ret; - page_idx = (dm->dev_addr - memic->dev->bar_addr - - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> - PAGE_SHIFT; - bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages, - page_idx, - DIV_ROUND_UP(act_size, PAGE_SIZE)); + page_idx = (dm->dev_addr - + pci_resource_start(dm_db->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dm_db->dev, + memic_bar_start_addr)) >> + PAGE_SHIFT; + bitmap_clear(ctx->dm_pages, page_idx, + DIV_ROUND_UP(dm->size, PAGE_SIZE)); + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size, + ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); + if (ret) + return ret; + break; + default: + return -EOPNOTSUPP; + } kfree(dm); return 0; } -static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct mlx5_ib_pd *pd = to_mpd(ibpd); struct ib_device *ibdev = ibpd->device; @@ -2350,8 +2466,10 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); - uid = context ? to_mucontext(context)->devx_uid : 0; + uid = context ? context->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), @@ -2361,7 +2479,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, pd->pdn = MLX5_GET(alloc_pd_out, out, pd); pd->uid = uid; - if (context) { + if (udata) { resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); @@ -2372,7 +2490,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -static void mlx5_ib_dealloc_pd(struct ib_pd *pd) +static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); @@ -3151,10 +3269,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (ft_type == MLX5_IB_FT_RX) { fn_type = MLX5_FLOW_NAMESPACE_BYPASS; prio = &dev->flow_db->prios[priority]; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; @@ -3164,7 +3282,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } @@ -3197,12 +3315,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (!ns) return ERR_PTR(-ENOTSUPP); - if (num_entries > max_table_size) - return ERR_PTR(-ENOMEM); + max_table_size = min_t(int, num_entries, max_table_size); ft = prio->flow_table; if (!ft) - return _get_prio(ns, prio, priority, num_entries, num_groups, + return _get_prio(ns, prio, priority, max_table_size, num_groups, flags); return prio; @@ -3370,7 +3487,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); - if (dev->rep && is_egress) + if (dev->is_rep && is_egress) return ERR_PTR(-EINVAL); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -3401,13 +3518,17 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); - if (dev->rep) { + if (dev->is_rep) { void *misc; + if (!dev->port[flow_attr->port - 1].rep) { + err = -EINVAL; + goto free; + } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_port, - dev->rep->vport); + dev->port[flow_attr->port - 1].rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -3769,11 +3890,16 @@ _get_flow_table(struct mlx5_ib_dev *dev, bool mcast) { struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - int max_table_size; + struct mlx5_ib_flow_prio *prio = NULL; + int max_table_size = 0; u32 flags = 0; int priority; + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(fs_matcher->priority, false); + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); @@ -3782,20 +3908,18 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */ - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + max_table_size = BIT( + MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + priority = FDB_BYPASS_PATH; } - if (max_table_size < MLX5_FS_MAX_ENTRIES) - return ERR_PTR(-ENOMEM); - - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); + max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); if (!ns) @@ -3803,13 +3927,18 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) prio = &dev->flow_db->prios[priority]; - else + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) prio = &dev->flow_db->egress_prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + prio = &dev->flow_db->fdb; + + if (!prio) + return ERR_PTR(-EINVAL); if (prio->flow_table) return prio; - return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, + return _get_prio(ns, prio, priority, max_table_size, MLX5_FS_MAX_TYPES, flags); } @@ -4509,7 +4638,7 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= dev->num_ports; port++) { + for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { dev->mdev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { @@ -4540,7 +4669,7 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev) mlx5_query_ext_port_caps(dev, port); } -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) +static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; @@ -4555,10 +4684,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!dprops) goto out; - err = set_has_smi_cap(dev); - if (err) - goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); @@ -4587,6 +4712,16 @@ out: return err; } +static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) +{ + /* For representors use port 1, is this is the only native + * port + */ + if (dev->is_rep) + return __get_port_caps(dev, 1); + return __get_port_caps(dev, port); +} + static void destroy_umrc_res(struct mlx5_ib_dev *dev) { int err; @@ -4596,7 +4731,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp); + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); if (dev->umrc.cq) ib_free_cq(dev->umrc.cq); if (dev->umrc.pd) @@ -4701,7 +4836,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) return 0; error_4: - mlx5_ib_destroy_qp(qp); + mlx5_ib_destroy_qp(qp, NULL); dev->umrc.qp = NULL; error_3: @@ -4752,11 +4887,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); - ret = mlx5_ib_alloc_pd(devr->p0, NULL, NULL); + ret = mlx5_ib_alloc_pd(devr->p0, NULL); if (ret) goto error0; - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); goto error1; @@ -4768,7 +4903,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->c0->cq_context = NULL; atomic_set(&devr->c0->usecnt, 0); - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x0)) { ret = PTR_ERR(devr->x0); goto error2; @@ -4779,7 +4914,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) mutex_init(&devr->x0->tgt_qp_mutex); INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x1)) { ret = PTR_ERR(devr->x1); goto error3; @@ -4797,19 +4932,21 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.ext.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; - devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s0)) { - ret = PTR_ERR(devr->s0); + devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s0) { + ret = -ENOMEM; goto error4; } + devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; - devr->s0->uobject = NULL; - devr->s0->event_handler = NULL; - devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.cq = devr->c0; + ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); + if (ret) + goto err_create; + atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); @@ -4819,18 +4956,21 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_BASIC; - devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s1)) { - ret = PTR_ERR(devr->s1); + devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s1) { + ret = -ENOMEM; goto error5; } + devr->s1->device = &dev->ib_dev; devr->s1->pd = devr->p0; - devr->s1->uobject = NULL; - devr->s1->event_handler = NULL; - devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; devr->s1->ext.cq = devr->c0; + + ret = mlx5_ib_create_srq(devr->s1, &attr, NULL); + if (ret) + goto error6; + atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s1->usecnt, 0); @@ -4842,16 +4982,20 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) return 0; +error6: + kfree(devr->s1); error5: - mlx5_ib_destroy_srq(devr->s0); + mlx5_ib_destroy_srq(devr->s0, NULL); +err_create: + kfree(devr->s0); error4: - mlx5_ib_dealloc_xrcd(devr->x1); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); error3: - mlx5_ib_dealloc_xrcd(devr->x0); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); error2: - mlx5_ib_destroy_cq(devr->c0); + mlx5_ib_destroy_cq(devr->c0, NULL); error1: - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_dealloc_pd(devr->p0, NULL); error0: kfree(devr->p0); return ret; @@ -4859,20 +5003,20 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { - struct mlx5_ib_dev *dev = - container_of(devr, struct mlx5_ib_dev, devr); int port; - mlx5_ib_destroy_srq(devr->s1); - mlx5_ib_destroy_srq(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0); - mlx5_ib_dealloc_xrcd(devr->x1); - mlx5_ib_destroy_cq(devr->c0); - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_destroy_srq(devr->s1, NULL); + kfree(devr->s1); + mlx5_ib_destroy_srq(devr->s0, NULL); + kfree(devr->s0); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_ib_destroy_cq(devr->c0, NULL); + mlx5_ib_dealloc_pd(devr->p0, NULL); kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ - for (port = 0; port < dev->num_ports; ++port) + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) cancel_work_sync(&devr->ports[port].pkey_change_work); } @@ -5015,10 +5159,10 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { int err; - dev->roce[port_num].nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier(&dev->roce[port_num].nb); + dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->port[port_num].roce.nb); if (err) { - dev->roce[port_num].nb.notifier_call = NULL; + dev->port[port_num].roce.nb.notifier_call = NULL; return err; } @@ -5027,9 +5171,9 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { - if (dev->roce[port_num].nb.notifier_call) { - unregister_netdevice_notifier(&dev->roce[port_num].nb); - dev->roce[port_num].nb.notifier_call = NULL; + if (dev->port[port_num].roce.nb.notifier_call) { + unregister_netdevice_notifier(&dev->port[port_num].roce.nb); + dev->port[port_num].roce.nb.notifier_call = NULL; } } @@ -5578,7 +5722,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n", port_num + 1); - ibdev->roce[port_num].last_port_state = IB_PORT_DOWN; + ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN; } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5738,7 +5882,10 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UA_MANDATORY), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, UVERBS_ATTR_TYPE(u16), - UA_MANDATORY)); + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, + enum mlx5_ib_uapi_dm_type, + UA_OPTIONAL)); ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_flow_action, @@ -5829,35 +5976,58 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, return &mcounters->ibcntrs; } -void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { + struct mlx5_core_dev *mdev = dev->mdev; + mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { srcu_barrier(&dev->mr_srcu); cleanup_srcu_struct(&dev->mr_srcu); } - kfree(dev->port); + + WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); + + WARN_ON(dev->dm.steering_sw_icm_alloc_blocks && + !bitmap_empty( + dev->dm.steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); + + kfree(dev->dm.steering_sw_icm_alloc_blocks); + + WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks && + !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM( + mdev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); + + kfree(dev->dm.header_modify_sw_icm_alloc_blocks); } -int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; int err; int i; - dev->port = kcalloc(dev->num_ports, sizeof(*dev->port), - GFP_KERNEL); - if (!dev->port) - return -ENOMEM; - for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); - rwlock_init(&dev->roce[i].netdev_lock); + rwlock_init(&dev->port[i].roce.netdev_lock); + dev->port[i].roce.dev = dev; + dev->port[i].roce.native_port_num = i + 1; + dev->port[i].roce.last_port_state = IB_PORT_DOWN; } err = mlx5_ib_init_multiport_master(dev); if (err) - goto err_free_port; + return err; + + err = set_has_smi_cap(dev); + if (err) + return err; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { @@ -5885,22 +6055,54 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); - spin_lock_init(&dev->memic.memic_lock); - dev->memic.dev = mdev; + if (MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) { + if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(mdev, + log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); + + dev->dm.steering_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(steering_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dev->dm.steering_sw_icm_alloc_blocks) + goto err_mp; + } + + if (MLX5_CAP64_DEV_MEM(mdev, + header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = BIT( + MLX5_CAP_DEV_MEM( + mdev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); + + dev->dm.header_modify_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dev->dm.header_modify_sw_icm_alloc_blocks) + goto err_dm; + } + } + + spin_lock_init(&dev->dm.lock); + dev->dm.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { err = init_srcu_struct(&dev->mr_srcu); if (err) - goto err_mp; + goto err_dm; } return 0; + +err_dm: + kfree(dev->dm.steering_sw_icm_alloc_blocks); + kfree(dev->dm.header_modify_sw_icm_alloc_blocks); + err_mp: mlx5_ib_cleanup_multiport_master(dev); -err_free_port: - kfree(dev->port); - return -ENOMEM; } @@ -5916,20 +6118,6 @@ static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) return 0; } -int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_ib_dev *nic_dev; - - nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch); - - if (!nic_dev) - return -EINVAL; - - dev->flow_db = nic_dev->flow_db; - - return 0; -} - static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) { kfree(dev->flow_db); @@ -5989,7 +6177,10 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; @@ -6025,7 +6216,7 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = { .reg_dm_mr = mlx5_ib_reg_dm_mr, }; -int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; int err; @@ -6091,7 +6282,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); } - if (MLX5_CAP_DEV_MEM(mdev, memic)) + if (MLX5_CAP_DEV_MEM(mdev, memic) || + MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); if (mlx5_accel_ipsec_device_caps(dev->mdev) & @@ -6131,7 +6324,7 @@ static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .query_port = mlx5_ib_rep_query_port, }; -int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; @@ -6149,13 +6342,6 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { u8 port_num; - int i; - - for (i = 0; i < dev->num_ports; i++) { - dev->roce[i].dev = dev; - dev->roce[i].native_port_num = i + 1; - dev->roce[i].last_port_state = IB_PORT_DOWN; - } dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | @@ -6167,6 +6353,7 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) port_num = mlx5_core_native_port_num(dev->mdev) - 1; + /* Register only for native ports */ return mlx5_add_netdev_notifier(dev, port_num); } @@ -6177,7 +6364,7 @@ static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) mlx5_remove_netdev_notifier(dev, port_num); } -int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; @@ -6193,7 +6380,7 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) return err; } -void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_stage_common_roce_cleanup(dev); } @@ -6240,12 +6427,12 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) } } -int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) { return create_dev_resources(&dev->devr); } -void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) { destroy_dev_resources(&dev->devr); } @@ -6267,7 +6454,7 @@ static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { .get_hw_stats = mlx5_ib_get_hw_stats, }; -int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); @@ -6278,7 +6465,7 @@ int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) return 0; } -void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -6308,7 +6495,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); } -int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) { int err; @@ -6323,13 +6510,13 @@ int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) return err; } -void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) { mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; @@ -6341,17 +6528,17 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) return ib_register_device(&dev->ib_dev, name); } -void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); } -void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) { ib_unregister_device(&dev->ib_dev); } -int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { return create_umr_res(dev); } @@ -6406,6 +6593,9 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, if (profile->stage[stage].cleanup) profile->stage[stage].cleanup(dev); } + + kfree(dev->port); + ib_dealloc_device(&dev->ib_dev); } void *__mlx5_ib_add(struct mlx5_ib_dev *dev, @@ -6527,6 +6717,9 @@ const struct mlx5_ib_profile uplink_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, + mlx5_ib_stage_devx_init, + mlx5_ib_stage_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6581,12 +6774,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) enum rdma_link_layer ll; struct mlx5_ib_dev *dev; int port_type_cap; + int num_ports; printk_once(KERN_INFO "%s", mlx5_version); if (MLX5_ESWITCH_MANAGER(mdev) && mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { - mlx5_ib_register_vport_reps(mdev); + if (!mlx5_core_mp_enabled(mdev)) + mlx5_ib_register_vport_reps(mdev); return mdev; } @@ -6596,13 +6791,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); + num_ports = max(MLX5_CAP_GEN(mdev, num_ports), + MLX5_CAP_GEN(mdev, num_vhca_ports)); dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; + dev->port = kcalloc(num_ports, sizeof(*dev->port), + GFP_KERNEL); + if (!dev->port) { + ib_dealloc_device((struct ib_device *)dev); + return NULL; + } dev->mdev = mdev; - dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), - MLX5_CAP_GEN(mdev, num_vhca_ports)); + dev->num_ports = num_ports; return __mlx5_ib_add(dev, &pf_profile); } @@ -6629,8 +6831,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) dev = context; __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); - - ib_dealloc_device((struct ib_device *)dev); } static struct mlx5_interface mlx5_ib_interface = { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a617d78eae1..40eb8be482e4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -48,6 +48,7 @@ #include <rdma/mlx5-abi.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> #include "srq.h" @@ -117,6 +118,10 @@ enum { MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, }; +#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \ + (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) +#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -194,6 +199,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; + struct mlx5_ib_flow_prio fdb; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -553,15 +559,28 @@ enum mlx5_ib_mtt_access_flags { struct mlx5_ib_dm { struct ib_dm ibdm; phys_addr_t dev_addr; + u32 type; + size_t size; + union { + struct { + u32 obj_id; + } icm_dm; + /* other dm types specific params should be added here */ + }; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) -#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ - IB_ACCESS_REMOTE_WRITE |\ - IB_ACCESS_REMOTE_READ |\ - IB_ACCESS_REMOTE_ATOMIC |\ - IB_ZERO_BASED) +#define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ACCESS_REMOTE_ATOMIC |\ + IB_ZERO_BASED) + +#define MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ZERO_BASED) struct mlx5_ib_mr { struct ib_mr ibmr; @@ -702,12 +721,6 @@ struct mlx5_ib_multiport { spinlock_t mpi_lock; }; -struct mlx5_ib_port { - struct mlx5_ib_counters cnts; - struct mlx5_ib_multiport mp; - struct mlx5_ib_dbg_cc_params *dbg_cc_params; -}; - struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer @@ -721,6 +734,14 @@ struct mlx5_roce { u8 native_port_num; }; +struct mlx5_ib_port { + struct mlx5_ib_counters cnts; + struct mlx5_ib_multiport mp; + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + struct mlx5_roce roce; + struct mlx5_eswitch_rep *rep; +}; + struct mlx5_ib_dbg_param { int offset; struct mlx5_ib_dev *dev; @@ -840,10 +861,16 @@ struct mlx5_ib_flow_action { }; }; -struct mlx5_memic { +struct mlx5_dm { struct mlx5_core_dev *dev; - spinlock_t memic_lock; + /* This lock is used to protect the access to the shared + * allocation map when concurrent requests by different + * processes are handled. + */ + spinlock_t lock; DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; }; struct mlx5_read_counters_attr { @@ -905,7 +932,6 @@ struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; struct notifier_block mdev_events; - struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask */ @@ -940,17 +966,18 @@ struct mlx5_ib_dev { struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - struct mlx5_eswitch_rep *rep; + bool is_rep; int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; struct list_head ib_dev_list; u64 sys_image_guid; - struct mlx5_memic memic; + struct mlx5_dm dm; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; + int free_port; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -968,6 +995,14 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + return to_mdev(context->ibucontext.device); +} + static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); @@ -1046,17 +1081,16 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); +int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); +int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -int mlx5_ib_destroy_srq(struct ib_srq *srq); +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); @@ -1068,7 +1102,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx5_ib_destroy_qp(struct ib_qp *qp); +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); void mlx5_ib_drain_sq(struct ib_qp *qp); void mlx5_ib_drain_rq(struct ib_qp *qp); int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -1083,9 +1117,8 @@ int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, int buflen, size_t *bc); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); -int mlx5_ib_destroy_cq(struct ib_cq *cq); +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1112,10 +1145,9 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); -int mlx5_ib_dereg_mr(struct ib_mr *ibmr); -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -1124,9 +1156,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); + struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -1170,7 +1201,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx5_ib_destroy_wq(struct ib_wq *wq); +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, @@ -1182,7 +1213,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); -int mlx5_ib_dealloc_dm(struct ib_dm *ibdm); +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs); struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); @@ -1230,23 +1261,6 @@ static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ -int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev); void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ca921fd40499..5f09699fab98 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -600,7 +600,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) { - if (!mlx5_debugfs_root || dev->rep) + if (!mlx5_debugfs_root || dev->is_rep) return; debugfs_remove_recursive(dev->cache.root); @@ -614,7 +614,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) struct dentry *dir; int i; - if (!mlx5_debugfs_root || dev->rep) + if (!mlx5_debugfs_root || dev->is_rep) return; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); @@ -677,7 +677,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->rep && + !dev->is_rep && mlx5_core_is_pf(dev->mdev)) ent->limit = dev->mdev->profile->mr_cache[i].limit; else @@ -1159,8 +1159,8 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } -static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, - u64 length, int acc) +static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, + u64 length, int acc, int mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); @@ -1182,9 +1182,8 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7); + MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); @@ -1194,7 +1193,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, MLX5_SET64(mkc, mkc, len, length); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, memic_addr - dev->mdev->bar_addr); + MLX5_SET64(mkc, mkc, start_addr, start_addr); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1236,15 +1235,31 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_dm *mdm = to_mdm(dm); - u64 memic_addr; + struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; + u64 start_addr = mdm->dev_addr + attr->offset; + int mode; - if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS) - return ERR_PTR(-EINVAL); + switch (mdm->type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); + + mode = MLX5_MKC_ACCESS_MODE_MEMIC; + start_addr -= pci_resource_start(dev->pdev, 0); + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); - memic_addr = mdm->dev_addr + attr->offset; + mode = MLX5_MKC_ACCESS_MODE_SW_ICM; + break; + default: + return ERR_PTR(-EINVAL); + } - return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length, - attr->access_flags); + return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, + attr->access_flags, mode); } struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -1622,15 +1637,14 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) kfree(mr); } -int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); return 0; } -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 0aa10ebda5d9..91507a2e9290 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -288,7 +288,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem->npages && mr->parent && + if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { WRITE_ONCE(umem_odp->dying, 1); atomic_inc(&mr->parent->num_leaf_free); @@ -711,6 +711,15 @@ struct pf_frame { int depth; }; +static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) +{ + if (!mmkey) + return false; + if (mmkey->type == MLX5_MKEY_MW) + return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); + return mmkey->key == key; +} + static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) { struct mlx5_ib_mw *mw; @@ -760,7 +769,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, next_mr: mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key)); - if (!mmkey || mmkey->key != key) { + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; goto srcu_unlock; @@ -920,7 +929,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, void *wqe, void *wqe_end, u32 *bytes_mapped, - u32 *total_wqe_bytes, int receive_queue) + u32 *total_wqe_bytes, bool receive_queue) { int ret = 0, npages = 0; u64 io_virt; @@ -1200,17 +1209,15 @@ static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - int ret; - void *wqe, *wqe_end; + bool sq = pfault->type & MLX5_PFAULT_REQUESTOR; + u16 wqe_index = pfault->wqe.wqe_index; + void *wqe = NULL, *wqe_end = NULL; u32 bytes_mapped, total_wqe_bytes; - char *buffer = NULL; + struct mlx5_core_rsc_common *res; int resume_with_error = 1; - u16 wqe_index = pfault->wqe.wqe_index; - int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; - struct mlx5_core_rsc_common *res = NULL; - struct mlx5_ib_qp *qp = NULL; - struct mlx5_ib_srq *srq = NULL; + struct mlx5_ib_qp *qp; size_t bytes_copied; + int ret = 0; res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); if (!res) { @@ -1218,87 +1225,74 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, return; } - switch (res->res) { - case MLX5_RES_QP: - qp = res_to_qp(res); - break; - case MLX5_RES_SRQ: - case MLX5_RES_XSRQ: - srq = res_to_srq(res); - break; - default: - mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); + if (res->res != MLX5_RES_QP && res->res != MLX5_RES_SRQ && + res->res != MLX5_RES_XSRQ) { + mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", + pfault->type); goto resolve_page_fault; } - buffer = (char *)__get_free_page(GFP_KERNEL); - if (!buffer) { + wqe = (void *)__get_free_page(GFP_KERNEL); + if (!wqe) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } - if (qp) { - if (requestor) { - ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, - buffer, PAGE_SIZE, - &bytes_copied); - } else { - ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, - buffer, PAGE_SIZE, - &bytes_copied); - } - } else { - ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, - buffer, PAGE_SIZE, + qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; + if (qp && sq) { + ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_initiator_pfault_handler( + dev, pfault, qp, &wqe, &wqe_end, bytes_copied); + } else if (qp && !sq) { + ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_responder_pfault_handler_rq( + dev, qp, wqe, &wqe_end, bytes_copied); + } else if (!qp) { + struct mlx5_ib_srq *srq = res_to_srq(res); + + ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE, &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_responder_pfault_handler_srq( + dev, srq, &wqe, &wqe_end, bytes_copied); } - if (ret) { - mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n", - ret, wqe_index, pfault->token); + if (ret < 0 || wqe >= wqe_end) goto resolve_page_fault; - } - wqe = buffer; - if (requestor) - ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, - &wqe, &wqe_end, - bytes_copied); - else if (qp) - ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp, - wqe, &wqe_end, - bytes_copied); - else - ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq, - &wqe, &wqe_end, - bytes_copied); + ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, + &total_wqe_bytes, !sq); + if (ret == -EAGAIN) + goto out; - if (ret < 0) + if (ret < 0 || total_wqe_bytes > bytes_mapped) goto resolve_page_fault; - if (wqe >= wqe_end) { - mlx5_ib_err(dev, "ODP fault on invalid WQE.\n"); - goto resolve_page_fault; - } +out: + ret = 0; + resume_with_error = 0; - ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, - &bytes_mapped, &total_wqe_bytes, - !requestor); - if (ret == -EAGAIN) { - resume_with_error = 0; - goto resolve_page_fault; - } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { - goto resolve_page_fault; - } +read_user: + if (ret) + mlx5_ib_err( + dev, + "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %x\n", + ret, wqe_index, pfault->token); - resume_with_error = 0; resolve_page_fault: mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", pfault->wqe.wq_num, resume_with_error, pfault->type); mlx5_core_res_put(res); - free_page((unsigned long)buffer); + free_page((unsigned long)wqe); } static int pages_in_range(u64 address, u32 length) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 581144e224e2..f6623c77443a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -92,6 +92,7 @@ struct mlx5_modify_raw_qp_param { struct mlx5_rate_limit rl; u8 rq_q_ctr_id; + u16 port; }; static void get_cqs(enum ib_qp_type qp_type, @@ -777,14 +778,17 @@ err_umem: } static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_rwq *rwq) + struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP) atomic_dec(&dev->delay_drop.rqs_cnt); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) ib_umem_release(rwq->umem); @@ -983,11 +987,15 @@ err_bfreg: } static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base) + struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base, + struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); if (base->ubuffer.umem) ib_umem_release(base->ubuffer.umem); @@ -1206,11 +1214,11 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid); } -static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) { if (sq->flow_rule) mlx5_del_flow_rules(sq->flow_rule); + sq->flow_rule = NULL; } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, @@ -1278,15 +1286,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (err) goto err_umem; - err = create_flow_rule_vport_sq(dev, sq); - if (err) - goto err_flow; - return 0; -err_flow: - mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); - err_umem: ib_umem_release(sq->ubuffer.umem); sq->ubuffer.umem = NULL; @@ -1297,7 +1298,7 @@ err_umem: static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { - destroy_flow_rule_vport_sq(dev, sq); + destroy_flow_rule_vport_sq(sq); mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); ib_umem_release(sq->ubuffer.umem); } @@ -1402,7 +1403,8 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u32 tdn, u32 *qp_flags_en, - struct ib_pd *pd) + struct ib_pd *pd, + u32 *out, int outlen) { u8 lb_flag = 0; u32 *in; @@ -1429,15 +1431,16 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; - if (dev->rep) { + if (dev->is_rep) { lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; } MLX5_SET(tirc, tirc, self_lb_block, lb_flag); - err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); + err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen); + rq->tirn = MLX5_GET(create_tir_out, out, tirn); if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { err = mlx5_ib_enable_lb(dev, false, true); @@ -1463,6 +1466,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int err; u32 tdn = mucontext->tdn; u16 uid = to_mpd(pd)->uid; + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); @@ -1495,7 +1499,9 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) goto err_destroy_sq; - err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd); + err = create_raw_packet_qp_tir( + dev, rq, tdn, &qp->flags_en, pd, out, + MLX5_ST_SZ_BYTES(create_tir_out)); if (err) goto err_destroy_rq; @@ -1504,6 +1510,20 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; resp->tirn = rq->tirn; resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + resp->tir_icm_addr = MLX5_GET( + create_tir_out, out, icm_address_31_0); + resp->tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + resp->tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + resp->comp_mask |= + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; + } } } @@ -1577,8 +1597,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_create_qp_resp resp = {}; int inlen; + int outlen; int err; u32 *in; + u32 *out; void *tirc; void *hfso; u32 selected_fields = 0; @@ -1641,7 +1663,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) { + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->is_rep) { lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; } @@ -1658,10 +1680,12 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); + outlen = MLX5_ST_SZ_BYTES(create_tir_out); + in = kvzalloc(inlen + outlen, GFP_KERNEL); if (!in) return -ENOMEM; + out = in + MLX5_ST_SZ_DW(create_tir_in); MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, disp_type, @@ -1773,8 +1797,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: - err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); + err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen); + qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn); if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { err = mlx5_ib_enable_lb(dev, false, true); @@ -1789,6 +1814,18 @@ create_tir: if (mucontext->devx_uid) { resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; resp.tirn = qp->rss_qp.tirn; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + resp.tir_icm_addr = + MLX5_GET(create_tir_out, out, icm_address_31_0); + resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + resp.comp_mask |= + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; + } } err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); @@ -2287,7 +2324,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, err_create: if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, pd, qp, base); + destroy_qp_user(dev, pd, qp, base, udata); else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); @@ -2398,7 +2435,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct mlx5_modify_raw_qp_param *raw_qp_param, u8 lag_tx_affinity); -static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_udata *udata) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base; @@ -2469,7 +2507,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base); + destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); } static const char *ib_qp_type_str(enum ib_qp_type type) @@ -2735,7 +2773,7 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) return 0; } -int mlx5_ib_destroy_qp(struct ib_qp *qp) +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -2746,7 +2784,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) if (mqp->qp_sub_type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); - destroy_qp_common(dev, mqp); + destroy_qp_common(dev, mqp, udata); kfree(mqp); @@ -2964,6 +3002,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | @@ -2997,6 +3040,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, }, }, [MLX5_QP_STATE_RTS] = { @@ -3013,6 +3062,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_SRQN | MLX5_QP_OPTPAR_CQN_RCV, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, }, }, [MLX5_QP_STATE_SQER] = { @@ -3024,6 +3079,10 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RRE, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, }, }, }; @@ -3264,6 +3323,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } if (modify_sq) { + struct mlx5_flow_handle *flow_rule; + if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, tx_affinity, @@ -3272,8 +3333,25 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, - raw_qp_param, qp->ibqp.pd); + flow_rule = create_flow_rule_vport_sq(dev, sq, + raw_qp_param->port); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + + err = modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, + raw_qp_param, qp->ibqp.pd); + if (err) { + if (flow_rule) + mlx5_del_flow_rules(flow_rule); + return err; + } + + if (flow_rule) { + destroy_flow_rule_vport_sq(sq); + sq->flow_rule = flow_rule; + } + + return err; } return 0; @@ -3298,7 +3376,7 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, } else { tx_port_affinity = (unsigned int)atomic_add_return( - 1, &dev->roce[port_num].tx_port_affinity) % + 1, &dev->port[port_num].roce.tx_port_affinity) % MLX5_MAX_PORTS + 1; mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", @@ -3403,7 +3481,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { if (dev->lag_active) { - u8 p = mlx5_core_native_port_num(dev->mdev); + u8 p = mlx5_core_native_port_num(dev->mdev) - 1; tx_affinity = get_tx_affinity(dev, pd, base, p, udata); context->flags |= cpu_to_be32(tx_affinity << 24); @@ -3556,6 +3634,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } + if (attr_mask & IB_QP_PORT) + raw_qp_param.port = attr->port_num; + if (attr_mask & IB_QP_RATE_LIMIT) { raw_qp_param.rl.rate = attr->rate_limit; @@ -4729,16 +4810,15 @@ static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) { __be32 *p = NULL; - u32 tidx = idx; int i, j; pr_debug("dump WQE index %u:\n", idx); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { - tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); - p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx); + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); pr_debug("WQBB at %p:\n", (void *)p); j = 0; + idx = (idx + 1) & (qp->sq.wqe_cnt - 1); } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), @@ -5627,8 +5707,7 @@ out: } struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; @@ -5650,7 +5729,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, return &xrcd->ibxrcd; } -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; @@ -5962,19 +6041,19 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: - destroy_user_rq(dev, pd, rwq); + destroy_user_rq(dev, pd, rwq, udata); err: kfree(rwq); return ERR_PTR(err); } -int mlx5_ib_destroy_wq(struct ib_wq *wq) +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); - destroy_user_rq(dev, wq->pd, rwq); + destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); return 0; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 1ec1beb1296b..4e7fde86c96b 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -194,9 +194,15 @@ err_db: return err; } -static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) +static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, + struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &srq->db); ib_umem_release(srq->umem); } @@ -208,16 +214,16 @@ static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) mlx5_db_free(dev->mdev, &srq->db); } -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int mlx5_ib_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_dev *dev = to_mdev(ib_srq->device); + struct mlx5_ib_srq *srq = to_msrq(ib_srq); size_t desc_size; size_t buf_size; int err; - struct mlx5_srq_attr in = {0}; + struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); /* Sanity check SRQ size before proceeding */ @@ -225,13 +231,9 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, max_srq_wqes); - return ERR_PTR(-EINVAL); + return -EINVAL; } - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); - mutex_init(&srq->mutex); spin_lock_init(&srq->lock); srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); @@ -239,35 +241,32 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); - if (desc_size == 0 || srq->msrq.max_gs > desc_size) { - err = -EINVAL; - goto err_srq; - } + if (desc_size == 0 || srq->msrq.max_gs > desc_size) + return -EINVAL; + desc_size = roundup_pow_of_two(desc_size); desc_size = max_t(size_t, 32, desc_size); - if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) { - err = -EINVAL; - goto err_srq; - } + if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) + return -EINVAL; + srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / sizeof(struct mlx5_wqe_data_seg); srq->msrq.wqe_shift = ilog2(desc_size); buf_size = srq->msrq.max * desc_size; - if (buf_size < desc_size) { - err = -EINVAL; - goto err_srq; - } + if (buf_size < desc_size) + return -EINVAL; + in.type = init_attr->srq_type; if (udata) - err = create_srq_user(pd, srq, &in, udata, buf_size); + err = create_srq_user(ib_srq->pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", udata ? "user" : "kernel", err); - goto err_srq; + return err; } in.log_size = ilog2(srq->msrq.max); @@ -297,7 +296,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, else in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; - in.pd = to_mpd(pd)->pdn; + in.pd = to_mpd(ib_srq->pd)->pdn; in.db_record = srq->db.dma; err = mlx5_cmd_create_srq(dev, &srq->msrq, &in); kvfree(in.pas); @@ -320,21 +319,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, init_attr->attr.max_wr = srq->msrq.max - 1; - return &srq->ibsrq; + return 0; err_core: mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: if (udata) - destroy_srq_user(pd, srq); + destroy_srq_user(ib_srq->pd, srq, udata); else destroy_srq_kernel(dev, srq); -err_srq: - kfree(srq); - - return ERR_PTR(err); + return err; } int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -387,7 +383,7 @@ out_box: return ret; } -int mlx5_ib_destroy_srq(struct ib_srq *srq) +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); @@ -395,14 +391,16 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq) mlx5_cmd_destroy_srq(dev, &msrq->msrq); if (srq->uobject) { - mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &msrq->db); ib_umem_release(msrq->umem); } else { destroy_srq_kernel(dev, msrq); } - - kfree(srq); - return 0; } void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index c330af35ff10..af197c36d757 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -51,15 +51,12 @@ struct mlx5_core_srq { struct mlx5_srq_table { struct notifier_block nb; - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; + struct xarray array; }; int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); -int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 63ac38bb3498..b0d0687c7a68 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -83,13 +83,11 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *srq; - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); + xa_lock(&table->array); + srq = xa_load(&table->array, srqn); if (srq) atomic_inc(&srq->common.refcount); - - spin_unlock(&table->lock); + xa_unlock(&table->array); return srq; } @@ -597,9 +595,7 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, atomic_set(&srq->common.refcount, 1); init_completion(&srq->common.free); - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL)); if (err) goto err_destroy_srq_split; @@ -611,26 +607,22 @@ err_destroy_srq_split: return err; } -int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; int err; - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); + tmp = xa_erase_irq(&table->array, srq->srqn); if (!tmp || tmp != srq) - return -EINVAL; + return; err = destroy_srq_split(dev, srq); if (err) - return err; + return; mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free); - - return 0; } int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, @@ -680,13 +672,11 @@ static int srq_event_notifier(struct notifier_block *nb, eqe = data; srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); + xa_lock(&table->array); + srq = xa_load(&table->array, srqn); if (srq) atomic_inc(&srq->common.refcount); - - spin_unlock(&table->lock); + xa_unlock(&table->array); if (!srq) return NOTIFY_OK; @@ -703,8 +693,7 @@ int mlx5_init_srq_table(struct mlx5_ib_dev *dev) struct mlx5_srq_table *table = &dev->srq_table; memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&table->array, XA_FLAGS_LOCK_IRQ); table->nb.notifier_call = srq_event_notifier; mlx5_notifier_register(dev->mdev, &table->nb); |