diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-19 09:51:33 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-19 09:51:33 -0700 |
commit | 3d51520954154a476bfdacf9427acd1d9538734c (patch) | |
tree | 8b28ef10d924613bf41c51a10a5861eead4af0fe /drivers/infiniband/hw/mlx5 | |
parent | ef7c8f2b1fb46d3fc7a46d64bb73919e288ba547 (diff) | |
parent | 887cd308fd46a1c6956e9ccda1aaca830edc8ed7 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Usual collection of small improvements and fixes:
- Bug fixes and minor improvments in efa, irdma, mlx4, mlx5, rxe,
hf1, qib, ocrdma
- bnxt_re support for MSN, which is a new retransmit logic
- Initial mana support for RC qps
- Use after free bug and cleanups in iwcm
- Reduce resource usage in mlx5 when RDMA verbs features are not used
- New verb to drain shared recieve queues, similar to normal recieve
queues. This is necessary to allow ULPs a clean shutdown. Used in
the iscsi rdma target
- mlx5 support for more than 16 bits of doorbell indexes
- Doorbell moderation support for bnxt_re
- IB multi-plane support for mlx5
- New EFA adaptor PCI IDs
- RDMA_NAME_ASSIGN_TYPE_USER to hint to userspace that it shouldn't
rename the device
- A collection of hns bugs
- Fix long standing bug in bnxt_re with incorrect endian handling of
immediate data"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (65 commits)
IB/hfi1: Constify struct flag_table
RDMA/mana_ib: Set correct device into ib
bnxt_re: Fix imm_data endianness
RDMA: Fix netdev tracker in ib_device_set_netdev
RDMA/hns: Fix mbx timing out before CMD execution is completed
RDMA/hns: Fix insufficient extend DB for VFs.
RDMA/hns: Fix undifined behavior caused by invalid max_sge
RDMA/hns: Fix shift-out-bounds when max_inline_data is 0
RDMA/hns: Fix missing pagesize and alignment check in FRMR
RDMA/hns: Fix unmatch exception handling when init eq table fails
RDMA/hns: Fix soft lockup under heavy CEQE load
RDMA/hns: Check atomic wr length
RDMA/ocrdma: Don't inline statistics functions
RDMA/core: Introduce "name_assign_type" for an IB device
RDMA/qib: Fix truncation compilation warnings in qib_verbs.c
RDMA/qib: Fix truncation compilation warnings in qib_init.c
RDMA/efa: Add EFA 0xefa3 PCI ID
RDMA/mlx5: Support per-plane port IB counters by querying PPCNT register
net/mlx5: mlx5_ifc update for accessing ppcnt register of plane ports
RDMA/mlx5: Add plane index support when querying PTYS registers
...
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cmd.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 31 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mad.c | 71 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 336 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 32 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qpc.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/umr.c | 55 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/umr.h | 3 |
13 files changed, 484 insertions, 101 deletions
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 1d0c8d5e745b..895b62cc528d 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -177,7 +177,7 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) return mlx5_cmd_exec_in(dev, dealloc_xrcd, in); } -int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, +int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb, u16 opmod, u8 port) { int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); @@ -195,12 +195,18 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); + if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) { + MLX5_SET(mad_ifc_in, in, plane_index, port); + MLX5_SET(mad_ifc_in, in, port, + smi_to_native_portnum(dev, port)); + } else { + MLX5_SET(mad_ifc_in, in, port, port); + } data = MLX5_ADDR_OF(mad_ifc_in, in, mad); memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - err = mlx5_cmd_exec_inout(dev, mad_ifc, in, out); + err = mlx5_cmd_exec_inout(dev->mdev, mad_ifc, in, out); if (err) goto out; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 93a971a40d11..e5cd31270443 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -54,7 +54,7 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid); int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); -int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, +int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid); int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 9773d2a3d97f..4c54dc578069 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -38,6 +38,9 @@ #include "srq.h" #include "qp.h" +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe) { struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; @@ -714,7 +717,8 @@ static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct mlx5_ib_cq *cq, int entries, u32 **cqb, - int *cqe_size, int *index, int *inlen) + int *cqe_size, int *index, int *inlen, + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_create_cq ucmd = {}; unsigned long page_size; @@ -788,7 +792,11 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); - if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX)) { + err = uverbs_copy_from(index, attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX); + if (err) + goto err_cqb; + } else if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; } else if (context->bfregi.lib_uar_dyn) { err = -EINVAL; @@ -942,8 +950,9 @@ static void notify_soft_wc_handler(struct work_struct *work) } int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata) + struct uverbs_attr_bundle *attrs) { + struct ib_udata *udata = &attrs->driver_udata; struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; @@ -980,7 +989,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (udata) { err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, - &index, &inlen); + &index, &inlen, attrs); if (err) return err; } else { @@ -1442,3 +1451,17 @@ int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) return 0; } + +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_cq_create, + UVERBS_OBJECT_CQ, + UVERBS_METHOD_CQ_CREATE, + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL)); + +const struct uapi_definition mlx5_ib_create_cq_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 3e43687a7f6f..1b6c5e37d169 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -69,7 +69,7 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + return mlx5_cmd_mad_ifc(dev, in_mad, response_mad, op_modifier, port); } @@ -147,8 +147,39 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, vl_15_dropped); } -static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, - size_t sz) +static void pma_cnt_ext_assign_ppcnt(struct ib_pma_portcounters_ext *cnt_ext, + void *out) +{ + void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out, + counter_set); + +#define MLX5_GET_EXT_CNTR(counter_name) \ + MLX5_GET64(ib_ext_port_cntrs_grp_data_layout, \ + out_pma, counter_name##_high) + + cnt_ext->port_xmit_data = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_data) >> 2); + cnt_ext->port_rcv_data = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_data) >> 2); + + cnt_ext->port_xmit_packets = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_pkts)); + cnt_ext->port_rcv_packets = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_pkts)); + + cnt_ext->port_unicast_xmit_packets = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_xmit_pkts)); + cnt_ext->port_unicast_rcv_packets = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_rcv_pkts)); + + cnt_ext->port_multicast_xmit_packets = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_xmit_pkts)); + cnt_ext->port_multicast_rcv_packets = + cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_rcv_pkts)); +} + +static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, u8 plane_num, + void *out, size_t sz, bool ext) { u32 *in; int err; @@ -160,8 +191,14 @@ static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, } MLX5_SET(ppcnt_reg, in, local_port, port_num); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); + MLX5_SET(ppcnt_reg, in, plane_ind, plane_num); + + if (ext) + MLX5_SET(ppcnt_reg, in, grp, + MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP); + else + MLX5_SET(ppcnt_reg, in, grp, + MLX5_INFINIBAND_PORT_COUNTERS_GROUP); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); @@ -189,7 +226,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, mdev_port_num = 1; } if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 && - !mlx5_core_mp_enabled(mdev)) { + !mlx5_core_mp_enabled(mdev) && + dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) { /* set local port to one for Function-Per-Port HCA. */ mdev = dev->mdev; mdev_port_num = 1; @@ -208,7 +246,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) { struct ib_pma_portcounters_ext *pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40); - int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); + int sz = max(MLX5_ST_SZ_BYTES(query_vport_counter_out), + MLX5_ST_SZ_BYTES(ppcnt_reg)); out_cnt = kvzalloc(sz, GFP_KERNEL); if (!out_cnt) { @@ -216,10 +255,18 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, goto done; } - err = mlx5_core_query_vport_counter(mdev, 0, 0, mdev_port_num, - out_cnt); - if (!err) - pma_cnt_ext_assign(pma_cnt_ext, out_cnt); + if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) { + err = query_ib_ppcnt(mdev, mdev_port_num, + port_num, out_cnt, sz, 1); + if (!err) + pma_cnt_ext_assign_ppcnt(pma_cnt_ext, out_cnt); + } else { + err = mlx5_core_query_vport_counter(mdev, 0, 0, + mdev_port_num, + out_cnt); + if (!err) + pma_cnt_ext_assign(pma_cnt_ext, out_cnt); + } } else { struct ib_pma_portcounters *pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40); @@ -231,7 +278,7 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, goto done; } - err = query_ib_ppcnt(mdev, mdev_port_num, out_cnt, sz); + err = query_ib_ppcnt(mdev, mdev_port_num, 0, out_cnt, sz, 0); if (!err) pma_cnt_assign(pma_cnt, out_cnt); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 086de6a022f9..6048b9ad13bb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -282,6 +282,14 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_port *port; + if (ibdev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) { + if (native_port_num) + *native_port_num = smi_to_native_portnum(ibdev, + ib_port_num); + return ibdev->mdev; + + } + if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) { if (native_port_num) @@ -503,10 +511,10 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num, */ if (dev->is_rep) err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, - 1); + 1, 0); else err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, - mdev_port_num); + mdev_port_num, 0); if (err) goto out; ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); @@ -1333,11 +1341,11 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *rep; + u8 vl_hw_cap, plane_index = 0; u16 max_mtu; u16 oper_mtu; int err; u16 ib_link_width_oper; - u8 vl_hw_cap; rep = kzalloc(sizeof(*rep), GFP_KERNEL); if (!rep) { @@ -1347,6 +1355,11 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port, /* props being zeroed by the caller, avoid zeroing it here */ + if (ibdev->type == RDMA_DEVICE_TYPE_SMI) { + plane_index = port; + port = smi_to_native_portnum(dev, port); + } + err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); if (err) goto out; @@ -1357,7 +1370,14 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port, props->sm_sl = rep->sm_sl; props->state = rep->vport_state; props->phys_state = rep->port_physical_state; - props->port_cap_flags = rep->cap_mask1; + + props->port_cap_flags = rep->cap_mask1; + if (dev->num_plane) { + props->port_cap_flags |= IB_PORT_SM_DISABLED; + props->port_cap_flags &= ~IB_PORT_SM; + } else if (ibdev->type == RDMA_DEVICE_TYPE_SMI) + props->port_cap_flags &= ~IB_PORT_CM_SUP; + props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); @@ -1370,7 +1390,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port, props->port_cap_flags2 = rep->cap_mask2; err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, - &props->active_speed, port); + &props->active_speed, port, plane_index); if (err) goto out; @@ -2776,6 +2796,23 @@ static int mlx5_ib_event_slave_port(struct notifier_block *nb, return NOTIFY_OK; } +static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane) +{ + struct mlx5_hca_vport_context vport_ctx; + int err; + + *num_plane = 0; + if (!MLX5_CAP_GEN(mdev, ib_virt)) + return 0; + + err = mlx5_query_hca_vport_context(mdev, 0, 1, 0, &vport_ctx); + if (err) + return err; + + *num_plane = vport_ctx.num_plane; + return 0; +} + static int set_has_smi_cap(struct mlx5_ib_dev *dev) { struct mlx5_hca_vport_context vport_ctx; @@ -2786,10 +2823,15 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) return 0; for (port = 1; port <= dev->num_ports; port++) { - if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) { + if (dev->num_plane) { + dev->port_caps[port - 1].has_smi = false; + continue; + } else if (!MLX5_CAP_GEN(dev->mdev, ib_virt) || + dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) { dev->port_caps[port - 1].has_smi = true; continue; } + err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0, &vport_ctx); if (err) { @@ -2823,37 +2865,72 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap) } } -static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) +int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) { struct mlx5_ib_resources *devr = &dev->devr; - struct ib_srq_init_attr attr; - struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; - int port; + struct ib_device *ibdev; + struct ib_pd *pd; + struct ib_cq *cq; int ret = 0; - ibdev = &dev->ib_dev; - if (!MLX5_CAP_GEN(dev->mdev, xrc)) - return -EOPNOTSUPP; + /* + * devr->c0 is set once, never changed until device unload. + * Avoid taking the mutex if initialization is already done. + */ + if (devr->c0) + return 0; - devr->p0 = ib_alloc_pd(ibdev, 0); - if (IS_ERR(devr->p0)) - return PTR_ERR(devr->p0); + mutex_lock(&devr->cq_lock); + if (devr->c0) + goto unlock; - devr->c0 = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); - if (IS_ERR(devr->c0)) { - ret = PTR_ERR(devr->c0); - goto error1; + ibdev = &dev->ib_dev; + pd = ib_alloc_pd(ibdev, 0); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%d\n", ret); + goto unlock; } - ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); - if (ret) - goto error2; + cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%d\n", ret); + ib_dealloc_pd(pd); + goto unlock; + } - ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); + devr->p0 = pd; + devr->c0 = cq; + +unlock: + mutex_unlock(&devr->cq_lock); + return ret; +} + +int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_resources *devr = &dev->devr; + struct ib_srq_init_attr attr; + struct ib_srq *s0, *s1; + int ret = 0; + + /* + * devr->s1 is set once, never changed until device unload. + * Avoid taking the mutex if initialization is already done. + */ + if (devr->s1) + return 0; + + mutex_lock(&devr->srq_lock); + if (devr->s1) + goto unlock; + + ret = mlx5_ib_dev_res_cq_init(dev); if (ret) - goto error3; + goto unlock; memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; @@ -2861,10 +2938,11 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) attr.srq_type = IB_SRQT_XRC; attr.ext.cq = devr->c0; - devr->s0 = ib_create_srq(devr->p0, &attr); - if (IS_ERR(devr->s0)) { - ret = PTR_ERR(devr->s0); - goto err_create; + s0 = ib_create_srq(devr->p0, &attr); + if (IS_ERR(s0)) { + ret = PTR_ERR(s0); + mlx5_ib_err(dev, "Couldn't create SRQ 0 for res init, err=%d\n", ret); + goto unlock; } memset(&attr, 0, sizeof(attr)); @@ -2872,29 +2950,48 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_BASIC; - devr->s1 = ib_create_srq(devr->p0, &attr); - if (IS_ERR(devr->s1)) { - ret = PTR_ERR(devr->s1); - goto error6; + s1 = ib_create_srq(devr->p0, &attr); + if (IS_ERR(s1)) { + ret = PTR_ERR(s1); + mlx5_ib_err(dev, "Couldn't create SRQ 1 for res init, err=%d\n", ret); + ib_destroy_srq(s0); + } + + devr->s0 = s0; + devr->s1 = s1; + +unlock: + mutex_unlock(&devr->srq_lock); + return ret; +} + +static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_resources *devr = &dev->devr; + int port; + int ret; + + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + return -EOPNOTSUPP; + + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); + if (ret) + return ret; + + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); + if (ret) { + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); + return ret; } for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) INIT_WORK(&devr->ports[port].pkey_change_work, pkey_change_handler); - return 0; + mutex_init(&devr->cq_lock); + mutex_init(&devr->srq_lock); -error6: - ib_destroy_srq(devr->s0); -err_create: - mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); -error3: - mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); -error2: - ib_destroy_cq(devr->c0); -error1: - ib_dealloc_pd(devr->p0); - return ret; + return 0; } static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) @@ -2911,12 +3008,20 @@ static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) cancel_work_sync(&devr->ports[port].pkey_change_work); - ib_destroy_srq(devr->s1); - ib_destroy_srq(devr->s0); + /* After s0/s1 init, they are not unset during the device lifetime. */ + if (devr->s1) { + ib_destroy_srq(devr->s1); + ib_destroy_srq(devr->s0); + } mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); - ib_destroy_cq(devr->c0); - ib_dealloc_pd(devr->p0); + /* After p0/c0 init, they are not unset during the device lifetime. */ + if (devr->c0) { + ib_destroy_cq(devr->c0); + ib_dealloc_pd(devr->p0); + } + mutex_destroy(&devr->cq_lock); + mutex_destroy(&devr->srq_lock); } static u32 get_core_cap_flags(struct ib_device *ibdev, @@ -2932,6 +3037,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev, if (rep->grh_required) ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED; + if (dev->num_plane) + return ret | RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_IB_MAD | + RDMA_CORE_CAP_IB_CM | RDMA_CORE_CAP_IB_SA | + RDMA_CORE_CAP_AF_IB; + else if (ibdev->type == RDMA_DEVICE_TYPE_SMI) + return ret | RDMA_CORE_CAP_IB_MAD | RDMA_CORE_CAP_IB_SMI; + if (ll == IB_LINK_LAYER_INFINIBAND) return ret | RDMA_CORE_PORT_IBA_IB; @@ -2967,6 +3079,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num, return err; if (ll == IB_LINK_LAYER_INFINIBAND) { + if (ibdev->type == RDMA_DEVICE_TYPE_SMI) + port_num = smi_to_native_portnum(dev, port_num); + err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0, &rep); if (err) @@ -3687,6 +3802,7 @@ static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_qos_defs), UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN(mlx5_ib_dm_defs), + UAPI_DEF_CHAIN(mlx5_ib_create_cq_defs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, @@ -3766,12 +3882,18 @@ err: return err; } +static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); +static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev); + static const struct ib_device_ops mlx5_ib_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_MLX5, .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION, .add_gid = mlx5_ib_add_gid, + .add_sub_dev = mlx5_ib_add_sub_dev, .alloc_mr = mlx5_ib_alloc_mr, .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity, .alloc_pd = mlx5_ib_alloc_pd, @@ -3786,6 +3908,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, + .del_sub_dev = mlx5_ib_del_sub_dev, .dereg_mr = mlx5_ib_dereg_mr, .destroy_ah = mlx5_ib_destroy_ah, .destroy_cq = mlx5_ib_destroy_cq, @@ -4075,7 +4198,10 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; - if (!mlx5_lag_is_active(dev->mdev)) + if (dev->sub_dev_name) { + name = dev->sub_dev_name; + ib_mark_name_assigned_by_user(&dev->ib_dev); + } else if (!mlx5_lag_is_active(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; @@ -4086,6 +4212,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { mlx5_mkey_cache_cleanup(dev); mlx5r_umr_resource_cleanup(dev); + mlx5r_umr_cleanup(dev); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -4097,7 +4224,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { int ret; - ret = mlx5r_umr_resource_init(dev); + ret = mlx5r_umr_init(dev); if (ret) return ret; @@ -4335,6 +4462,89 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL), }; +static const struct mlx5_ib_profile plane_profile = { + STAGE_CREATE(MLX5_IB_STAGE_INIT, + mlx5_ib_stage_init_init, + mlx5_ib_stage_init_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_CAPS, + mlx5_ib_stage_caps_init, + mlx5_ib_stage_caps_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, + mlx5_ib_stage_non_default_cb, + NULL), + STAGE_CREATE(MLX5_IB_STAGE_QP, + mlx5_init_qp_table, + mlx5_cleanup_qp_table), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_BFREG, + mlx5_ib_stage_bfrag_init, + mlx5_ib_stage_bfrag_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_IB_REG, + mlx5_ib_stage_ib_reg_init, + mlx5_ib_stage_ib_reg_cleanup), +}; + +static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name) +{ + struct mlx5_ib_dev *mparent = to_mdev(parent), *mplane; + enum rdma_link_layer ll; + int ret; + + if (mparent->smi_dev) + return ERR_PTR(-EEXIST); + + ll = mlx5_port_type_cap_to_rdma_ll(MLX5_CAP_GEN(mparent->mdev, + port_type)); + if (type != RDMA_DEVICE_TYPE_SMI || !mparent->num_plane || + ll != IB_LINK_LAYER_INFINIBAND || + !MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud)) + return ERR_PTR(-EOPNOTSUPP); + + mplane = ib_alloc_device(mlx5_ib_dev, ib_dev); + if (!mplane) + return ERR_PTR(-ENOMEM); + + mplane->port = kcalloc(mparent->num_plane * mparent->num_ports, + sizeof(*mplane->port), GFP_KERNEL); + if (!mplane->port) { + ret = -ENOMEM; + goto fail_kcalloc; + } + + mplane->ib_dev.type = type; + mplane->mdev = mparent->mdev; + mplane->num_ports = mparent->num_plane; + mplane->sub_dev_name = name; + + ret = __mlx5_ib_add(mplane, &plane_profile); + if (ret) + goto fail_ib_add; + + mparent->smi_dev = mplane; + return &mplane->ib_dev; + +fail_ib_add: + kfree(mplane->port); +fail_kcalloc: + ib_dealloc_device(&mplane->ib_dev); + return ERR_PTR(ret); +} + +static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev) +{ + struct mlx5_ib_dev *mdev = to_mdev(sub_dev); + + to_mdev(sub_dev->parent)->smi_dev = NULL; + __mlx5_ib_remove(mdev, mdev->profile, MLX5_IB_STAGE_MAX); +} + static int mlx5r_mp_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -4412,11 +4622,18 @@ static int mlx5r_probe(struct auxiliary_device *adev, dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return -ENOMEM; + + if (ll == IB_LINK_LAYER_INFINIBAND) { + ret = mlx5_ib_get_plane_num(mdev, &dev->num_plane); + if (ret) + goto fail; + } + dev->port = kcalloc(num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) { - ib_dealloc_device(&dev->ib_dev); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } dev->mdev = mdev; @@ -4428,14 +4645,17 @@ static int mlx5r_probe(struct auxiliary_device *adev, profile = &pf_profile; ret = __mlx5_ib_add(dev, profile); - if (ret) { - kfree(dev->port); - ib_dealloc_device(&dev->ib_dev); - return ret; - } + if (ret) + goto fail_ib_add; auxiliary_set_drvdata(adev, dev); return 0; + +fail_ib_add: + kfree(dev->port); +fail: + ib_dealloc_device(&dev->ib_dev); + return ret; } static void mlx5r_remove(struct auxiliary_device *adev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b68779e9d86c..d5eb1b726675 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -115,6 +115,19 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ page_offset_quantized) +static inline unsigned long +mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf) +{ + /* + * mkeys used for dmabuf are fixed at PAGE_SIZE because we must be able + * to hold any sgl after a move operation. Ideally the mkc page size + * could be changed at runtime to be optimal, but right now the driver + * cannot do that. + */ + return ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE, + umem_dmabuf->umem.iova); +} + enum { MLX5_IB_MMAP_OFFSET_START = 9, MLX5_IB_MMAP_OFFSET_END = 255, @@ -751,6 +764,8 @@ struct umr_common { */ struct mutex lock; unsigned int state; + /* Protects from repeat UMR QP creation */ + struct mutex init_lock; }; #define NUM_MKEYS_PER_PAGE \ @@ -822,11 +837,13 @@ struct mlx5_ib_port_resources { struct mlx5_ib_resources { struct ib_cq *c0; + struct mutex cq_lock; u32 xrcdn0; u32 xrcdn1; struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; + struct mutex srq_lock; struct mlx5_ib_port_resources ports[2]; }; @@ -1172,6 +1189,10 @@ struct mlx5_ib_dev { #ifdef CONFIG_MLX5_MACSEC struct mlx5_macsec macsec; #endif + + u8 num_plane; + struct mlx5_ib_dev *smi_dev; + const char *sub_dev_name; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1270,6 +1291,8 @@ to_mmmap(struct rdma_user_mmap_entry *rdma_entry) struct mlx5_user_mmap_entry, rdma_entry); } +int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev); +int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev); int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); @@ -1309,7 +1332,7 @@ int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, size_t buflen, size_t *bc); int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); @@ -1509,6 +1532,7 @@ extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; extern const struct uapi_definition mlx5_ib_qos_defs[]; extern const struct uapi_definition mlx5_ib_std_types_defs[]; +extern const struct uapi_definition mlx5_ib_create_cq_defs[]; static inline int is_qp1(enum ib_qp_type qp_type) { @@ -1677,4 +1701,10 @@ static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev, int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr); + +static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port) +{ + return (port - 1) / dev->num_ports + 1; +} + #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d3c1f63791a2..98bd8eaa393e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1470,6 +1470,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem *umem; + int err; if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) return ERR_PTR(-EOPNOTSUPP); @@ -1477,6 +1478,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", start, iova, length, access_flags); + err = mlx5r_umr_resource_init(dev); + if (err) + return ERR_PTR(err); + if (access_flags & IB_ACCESS_ON_DEMAND) return create_user_odp_mr(pd, start, length, iova, access_flags, udata); @@ -1523,6 +1528,10 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n", offset, virt_addr, length, fd, access_flags); + err = mlx5r_umr_resource_init(dev); + if (err) + return ERR_PTR(err); + /* dmabuf requires xlt update via umr to work. */ if (!mlx5r_umr_can_load_pas(dev, length)) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4a04cbc5b78a..a524181f34df 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -705,10 +705,8 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, return err; } - page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc, - log_page_size, 0, - umem_dmabuf->umem.iova); - if (unlikely(page_size < PAGE_SIZE)) { + page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf); + if (!page_size) { ib_umem_dmabuf_unmap_pages(umem_dmabuf); err = -EINVAL; } else { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e8c0fead4062..e39b1a101e97 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3234,6 +3234,10 @@ int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, enum ib_qp_type type; int err; + err = mlx5_ib_dev_res_srq_init(dev); + if (err) + return err; + err = check_qp_type(dev, attr, &type); if (err) return err; @@ -4213,7 +4217,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, /* todo implement counter_index functionality */ - if (is_sqp(qp->type)) + if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI && is_qp0(qp->type)) { + MLX5_SET(ads, pri_path, vhca_port_num, + smi_to_native_portnum(dev, qp->port)); + if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) + MLX5_SET(ads, pri_path, plane_index, qp->port); + } else if (is_sqp(qp->type)) MLX5_SET(ads, pri_path, vhca_port_num, qp->port); if (attr_mask & IB_QP_PORT) diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index d9cf6982d645..d3dcc272200a 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -249,7 +249,8 @@ int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, if (err) goto err_cmd; - mlx5_debug_qp_add(dev->mdev, qp); + if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) + mlx5_debug_qp_add(dev->mdev, qp); return 0; @@ -307,7 +308,8 @@ int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) { u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; - mlx5_debug_qp_remove(dev->mdev, qp); + if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) + mlx5_debug_qp_remove(dev->mdev, qp); destroy_resource_common(dev, qp); @@ -504,7 +506,9 @@ int mlx5_init_qp_table(struct mlx5_ib_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); xa_init(&table->dct_xa); - mlx5_qp_debugfs_init(dev->mdev); + + if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) + mlx5_qp_debugfs_init(dev->mdev); table->nb.notifier_call = rsc_event_notifier; mlx5_notifier_register(dev->mdev, &table->nb); @@ -517,7 +521,8 @@ void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev) struct mlx5_qp_table *table = &dev->qp_table; mlx5_notifier_unregister(dev->mdev, &table->nb); - mlx5_qp_debugfs_cleanup(dev->mdev); + if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) + mlx5_qp_debugfs_cleanup(dev->mdev); } int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 84be0c3d5699..bcb6b324af50 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -216,6 +216,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, return -EINVAL; } + err = mlx5_ib_dev_res_cq_init(dev); + if (err) + return err; + mutex_init(&srq->mutex); spin_lock_init(&srq->lock); srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index e76142f6fa88..ffc31b01f690 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -135,22 +135,28 @@ static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp) int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) { struct ib_qp_init_attr init_attr = {}; - struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - int ret; + int ret = 0; - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - return PTR_ERR(pd); - } + + /* + * UMR qp is set once, never changed until device unload. + * Avoid taking the mutex if initialization is already done. + */ + if (dev->umrc.qp) + return 0; + + mutex_lock(&dev->umrc.init_lock); + /* First user allocates the UMR resources. Skip if already allocated. */ + if (dev->umrc.qp) + goto unlock; cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); - goto destroy_pd; + goto unlock; } init_attr.send_cq = cq; @@ -160,7 +166,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) init_attr.cap.max_send_sge = 1; init_attr.qp_type = MLX5_IB_QPT_REG_UMR; init_attr.port_num = 1; - qp = ib_create_qp(pd, &init_attr); + qp = ib_create_qp(dev->umrc.pd, &init_attr); if (IS_ERR(qp)) { mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); ret = PTR_ERR(qp); @@ -171,22 +177,22 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) if (ret) goto destroy_qp; - dev->umrc.qp = qp; dev->umrc.cq = cq; - dev->umrc.pd = pd; sema_init(&dev->umrc.sem, MAX_UMR_WR); mutex_init(&dev->umrc.lock); dev->umrc.state = MLX5_UMR_STATE_ACTIVE; + dev->umrc.qp = qp; + mutex_unlock(&dev->umrc.init_lock); return 0; destroy_qp: ib_destroy_qp(qp); destroy_cq: ib_free_cq(cq); -destroy_pd: - ib_dealloc_pd(pd); +unlock: + mutex_unlock(&dev->umrc.init_lock); return ret; } @@ -194,8 +200,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) { if (dev->umrc.state == MLX5_UMR_STATE_UNINIT) return; + mutex_destroy(&dev->umrc.lock); + /* After device init, UMR cp/qp are not unset during the lifetime. */ ib_destroy_qp(dev->umrc.qp); ib_free_cq(dev->umrc.cq); +} + +int mlx5r_umr_init(struct mlx5_ib_dev *dev) +{ + struct ib_pd *pd; + + pd = ib_alloc_pd(&dev->ib_dev, 0); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + return PTR_ERR(pd); + } + dev->umrc.pd = pd; + + mutex_init(&dev->umrc.init_lock); + + return 0; +} + +void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) +{ + mutex_destroy(&dev->umrc.init_lock); ib_dealloc_pd(dev->umrc.pd); } diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h index 3799bb758e49..5f734dc72bef 100644 --- a/drivers/infiniband/hw/mlx5/umr.h +++ b/drivers/infiniband/hw/mlx5/umr.h @@ -16,6 +16,9 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev); void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev); +int mlx5r_umr_init(struct mlx5_ib_dev *dev); +void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev); + static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev, size_t length) { |