From 52b4bdd28c861e7331543f4b5a0853b80c9fd3fa Mon Sep 17 00:00:00 2001 From: Yuanyuan Zhong Date: Thu, 29 Jun 2023 15:32:48 -0600 Subject: RDMA/mlx5: align MR mem allocation size to power-of-two The MR memory allocation requests extra bytes to guarantee that there is enough space to find the memory aligned to MLX5_UMR_ALIGN. For power-of-two sizes, the alignment can be guaranteed by kmalloc() according to commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment for kmalloc(power-of-two)"). So if target alignment is power-of-two and adding the extra bytes crosses a power-of-two boundary, use the next power-of-two as the allocation size. Signed-off-by: Yuanyuan Zhong Link: https://lore.kernel.org/r/20230629213248.3184245-2-yzhong@purestorage.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2017ede100a6..92f35fafb2c0 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1766,6 +1766,11 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ret; add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) { + int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size)); + + add_size = min_t(int, end - size, add_size); + } mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); if (!mr->descs_alloc) -- cgit From 113383eff3ff6f6ea6fcddeb469d10d21c8e3d35 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Mon, 3 Jul 2023 15:34:04 +0000 Subject: RDMA/efa: Add RDMA write HW statistics counters Update device API and request RDMA write counters if RDMA write is supported by device. Expose newly added counters through ib core counters mechanism. Reviewed-by: Daniel Kranzdorf Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Link: https://lore.kernel.org/r/20230703153404.30877-1-mrgolin@amazon.com Reviewed-by: Gal Pressman Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 13 +++++++++++++ drivers/infiniband/hw/efa/efa_com_cmd.c | 8 +++++++- drivers/infiniband/hw/efa/efa_com_cmd.h | 10 +++++++++- drivers/infiniband/hw/efa/efa_verbs.c | 18 ++++++++++++++++++ 4 files changed, 47 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 4e93ef7f84ee..9c65bd27bae0 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -66,6 +66,7 @@ enum efa_admin_get_stats_type { EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1, EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2, + EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE = 3, }; enum efa_admin_get_stats_scope { @@ -570,6 +571,16 @@ struct efa_admin_rdma_read_stats { u64 read_resp_bytes; }; +struct efa_admin_rdma_write_stats { + u64 write_wrs; + + u64 write_bytes; + + u64 write_wr_err; + + u64 write_recv_bytes; +}; + struct efa_admin_acq_get_stats_resp { struct efa_admin_acq_common_desc acq_common_desc; @@ -579,6 +590,8 @@ struct efa_admin_acq_get_stats_resp { struct efa_admin_messages_stats messages_stats; struct efa_admin_rdma_read_stats rdma_read_stats; + + struct efa_admin_rdma_write_stats rdma_write_stats; } u; }; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 8f8885e002ba..576811885d59 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -794,6 +794,12 @@ int efa_com_get_stats(struct efa_com_dev *edev, result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err; result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes; break; + case EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE: + result->rdma_write_stats.write_wrs = resp.u.rdma_write_stats.write_wrs; + result->rdma_write_stats.write_bytes = resp.u.rdma_write_stats.write_bytes; + result->rdma_write_stats.write_wr_err = resp.u.rdma_write_stats.write_wr_err; + result->rdma_write_stats.write_recv_bytes = resp.u.rdma_write_stats.write_recv_bytes; + break; } return 0; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 0898ad5bc340..fc97f37bb39b 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -262,10 +262,18 @@ struct efa_com_rdma_read_stats { u64 read_resp_bytes; }; +struct efa_com_rdma_write_stats { + u64 write_wrs; + u64 write_bytes; + u64 write_wr_err; + u64 write_recv_bytes; +}; + union efa_com_get_stats_result { struct efa_com_basic_stats basic_stats; struct efa_com_messages_stats messages_stats; struct efa_com_rdma_read_stats rdma_read_stats; + struct efa_com_rdma_write_stats rdma_write_stats; }; int efa_com_create_qp(struct efa_com_dev *edev, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 2a195c4b0f17..7a27d79c0541 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -61,6 +61,10 @@ struct efa_user_mmap_entry { op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ + op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \ + op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \ + op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \ + op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \ #define EFA_STATS_ENUM(ename, name) ename, #define EFA_STATS_STR(ename, nam) \ @@ -2080,6 +2084,7 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, { struct efa_com_get_stats_params params = {}; union efa_com_get_stats_result result; + struct efa_com_rdma_write_stats *rws; struct efa_com_rdma_read_stats *rrs; struct efa_com_messages_stats *ms; struct efa_com_basic_stats *bs; @@ -2121,6 +2126,19 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; + if (EFA_DEV_CAP(dev, RDMA_WRITE)) { + params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + rws = &result.rdma_write_stats; + stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs; + stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes; + stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err; + stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes; + } + return ARRAY_SIZE(efa_port_stats_descs); } -- cgit From 65e02e840847158c7ee48ca8e6e91062b0f78662 Mon Sep 17 00:00:00 2001 From: Minjie Du Date: Thu, 6 Jul 2023 10:27:03 +0800 Subject: RDMA/qedr: Remove a duplicate assignment in irdma_query_ah() Delete a duplicate statement from this function implementation. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Minjie Du Acked-by: Alok Prasad Link: https://lore.kernel.org/r/20230706022704.1260-1-duminjie@vivo.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9c4fe4fa9001..a8326a95d186 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4424,7 +4424,6 @@ static int irdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos; ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl; ah_attr->grh.sgid_index = ah->sgid_index; - ah_attr->grh.sgid_index = ah->sgid_index; memcpy(&ah_attr->grh.dgid, &ah->dgid, sizeof(ah_attr->grh.dgid)); } -- cgit From f877f22ac1e9bf1f9aded3765b0012851e1dc4c5 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 11 Jul 2023 12:53:18 -0500 Subject: RDMA/irdma: Implement egress VLAN priority When a VLAN interface is in use, get and use the VLAN egress mapping. Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230711175318.1301-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 66 +++++++++++++++++++++++++++++++++++-- drivers/infiniband/hw/irdma/verbs.c | 45 ++++++++++++++++++++----- 2 files changed, 99 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 70009b970e08..6b71b67ce9ff 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1555,6 +1555,41 @@ static int irdma_del_multiple_qhash(struct irdma_device *iwdev, return ret; } +static u8 irdma_iw_get_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4) +{ + struct net_device *ndev = NULL; + + rcu_read_lock(); + if (ipv4) { + ndev = ip_dev_find(&init_net, htonl(loc_addr[0])); + } else { + struct net_device *ip_dev; + struct in6_addr laddr6; + + irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, loc_addr); + + for_each_netdev_rcu (&init_net, ip_dev) { + if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) { + ndev = ip_dev; + break; + } + } + } + + if (!ndev) + goto done; + if (is_vlan_dev(ndev)) + prio = (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK) + >> VLAN_PRIO_SHIFT; + if (ipv4) + dev_put(ndev); + +done: + rcu_read_unlock(); + + return prio; +} + /** * irdma_netdev_vlan_ipv6 - Gets the netdev and mac * @addr: local IPv6 address @@ -1667,6 +1702,12 @@ static int irdma_add_mqh_6(struct irdma_device *iwdev, ifp->addr.in6_u.u6_addr32); memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); + if (!iwdev->vsi.dscp_mode) + cm_info->user_pri = + irdma_iw_get_vlan_prio(child_listen_node->loc_addr, + cm_info->user_pri, + false); + ret = irdma_manage_qhash(iwdev, cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, @@ -1751,6 +1792,11 @@ static int irdma_add_mqh_4(struct irdma_device *iwdev, ntohl(ifa->ifa_address); memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); + if (!iwdev->vsi.dscp_mode) + cm_info->user_pri = + irdma_iw_get_vlan_prio(child_listen_node->loc_addr, + cm_info->user_pri, + true); ret = irdma_manage_qhash(iwdev, cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, @@ -2219,6 +2265,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, } else { cm_node->tos = max(listener->tos, cm_info->tos); cm_node->user_pri = rt_tos2priority(cm_node->tos); + cm_node->user_pri = + irdma_iw_get_vlan_prio(cm_info->loc_addr, + cm_node->user_pri, + cm_info->ipv4); } ibdev_dbg(&iwdev->ibdev, "DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos, @@ -3832,11 +3882,15 @@ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_info.cm_id = cm_id; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; cm_info.tos = cm_id->tos; - if (iwdev->vsi.dscp_mode) + if (iwdev->vsi.dscp_mode) { cm_info.user_pri = iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)]; - else + } else { cm_info.user_pri = rt_tos2priority(cm_id->tos); + cm_info.user_pri = irdma_iw_get_vlan_prio(cm_info.loc_addr, + cm_info.user_pri, + cm_info.ipv4); + } if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri)) return -ENOMEM; @@ -3980,7 +4034,7 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) cm_listen_node->tos = cm_id->tos; if (iwdev->vsi.dscp_mode) cm_listen_node->user_pri = - iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; + iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; else cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); cm_info.user_pri = cm_listen_node->user_pri; @@ -3990,6 +4044,12 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) if (err) goto error; } else { + if (!iwdev->vsi.dscp_mode) + cm_listen_node->user_pri = + irdma_iw_get_vlan_prio(cm_info.loc_addr, + cm_info.user_pri, + cm_info.ipv4); + cm_info.user_pri = cm_listen_node->user_pri; err = irdma_manage_qhash(iwdev, &cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a8326a95d186..a7b82aea4d08 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1098,6 +1098,24 @@ static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, return 0; } +static u8 irdma_roce_get_vlan_prio(const struct ib_gid_attr *attr, u8 prio) +{ + struct net_device *ndev; + + rcu_read_lock(); + ndev = rcu_dereference(attr->ndev); + if (!ndev) + goto exit; + if (is_vlan_dev(ndev)) { + u16 vlan_qos = vlan_dev_get_egress_qos_mask(ndev, prio); + + prio = (vlan_qos & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + } +exit: + rcu_read_unlock(); + return prio; +} + /** * irdma_modify_qp_roce - modify qp request * @ibqp: qp's pointer for modify @@ -1174,7 +1192,8 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_AV) { struct irdma_av *av = &iwqp->roce_ah.av; - const struct ib_gid_attr *sgid_attr; + const struct ib_gid_attr *sgid_attr = + attr->ah_attr.grh.sgid_attr; u16 vlan_id = VLAN_N_VID; u32 local_ip[4]; @@ -1189,17 +1208,22 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, roce_info->dest_qp); irdma_qp_rem_qos(&iwqp->sc_qp); dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri); - ctx_info->user_pri = rt_tos2priority(udp_info->tos); - iwqp->sc_qp.user_pri = ctx_info->user_pri; - if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri)) - return -ENOMEM; - irdma_qp_add_qos(&iwqp->sc_qp); + if (iwqp->sc_qp.vsi->dscp_mode) + ctx_info->user_pri = + iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)]; + else + ctx_info->user_pri = rt_tos2priority(udp_info->tos); } - sgid_attr = attr->ah_attr.grh.sgid_attr; ret = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, ctx_info->roce_info->mac_addr); if (ret) return ret; + ctx_info->user_pri = irdma_roce_get_vlan_prio(sgid_attr, + ctx_info->user_pri); + if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri)) + return -ENOMEM; + iwqp->sc_qp.user_pri = ctx_info->user_pri; + irdma_qp_add_qos(&iwqp->sc_qp); if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) vlan_id = 0; @@ -4261,9 +4285,12 @@ static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr) ah_info->vlan_tag = 0; if (ah_info->vlan_tag < VLAN_N_VID) { + u8 prio = rt_tos2priority(ah_info->tc_tos); + + prio = irdma_roce_get_vlan_prio(sgid_attr, prio); + + ah_info->vlan_tag |= (u16)prio << VLAN_PRIO_SHIFT; ah_info->insert_vlan_tag = true; - ah_info->vlan_tag |= - rt_tos2priority(ah_info->tc_tos) << VLAN_PRIO_SHIFT; } return 0; -- cgit From b3d2b014b259ba758d72d7026685091bde1cf2d6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Jul 2023 21:38:09 +0200 Subject: RDMA/irdma: Fix building without IPv6 The new irdma_iw_get_vlan_prio() function requires IPv6 support to build: x86_64-linux-ld: drivers/infiniband/hw/irdma/cm.o: in function `irdma_iw_get_vlan_prio': cm.c:(.text+0x2832): undefined reference to `ipv6_chk_addr' Add a compile-time check in the same way as elsewhere in this file to avoid this by conditionally leaving out the ipv6 specific bits. Fixes: f877f22ac1e9b ("RDMA/irdma: Implement egress VLAN priority") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230718193835.3546684-1-arnd@kernel.org Acked-by: Shiraz Saleem Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 6b71b67ce9ff..8ea55c6a3fba 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1562,7 +1562,7 @@ static u8 irdma_iw_get_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4) rcu_read_lock(); if (ipv4) { ndev = ip_dev_find(&init_net, htonl(loc_addr[0])); - } else { + } else if (IS_ENABLED(CONFIG_IPV6)) { struct net_device *ip_dev; struct in6_addr laddr6; -- cgit From c619af83277872465cb74e7a351556fba97d85e8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:20 +0200 Subject: RDMA/erdma: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Link: https://lore.kernel.org/r/20230627144339.144478-6-Julia.Lawall@inria.fr Signed-off-by: Julia Lawall Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/erdma/erdma_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 517676fbb8b1..fe0521f1536e 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -481,8 +481,8 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT); kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET; - kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64)); - kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64)); + kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64)); + kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64)); if (!kqp->swr_tbl || !kqp->rwr_tbl) goto err_out; -- cgit From 9191df0029266cd32ed8f47def22081e18f2d9b8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:29 +0200 Subject: RDMA/siw: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Link: https://lore.kernel.org/r/20230627144339.144478-15-Julia.Lawall@inria.fr Signed-off-by: Julia Lawall Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_qp.c | 4 ++-- drivers/infiniband/sw/siw/siw_verbs.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 81e9bbd9ebda..47d0197db9a1 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -204,7 +204,7 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { if (irq_size) { irq_size = roundup_pow_of_two(irq_size); - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + qp->irq = vcalloc(irq_size, sizeof(struct siw_sqe)); if (!qp->irq) { qp->attrs.irq_size = 0; return -ENOMEM; @@ -212,7 +212,7 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) } if (orq_size) { orq_size = roundup_pow_of_two(orq_size); - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + qp->orq = vcalloc(orq_size, sizeof(struct siw_sqe)); if (!qp->orq) { qp->attrs.orq_size = 0; qp->attrs.irq_size = 0; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 398ec13db624..296d839ee876 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -381,7 +381,7 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); else - qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); + qp->sendq = vcalloc(num_sqe, sizeof(struct siw_sqe)); if (qp->sendq == NULL) { rv = -ENOMEM; @@ -414,7 +414,7 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->recvq = vmalloc_user(num_rqe * sizeof(struct siw_rqe)); else - qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); + qp->recvq = vcalloc(num_rqe, sizeof(struct siw_rqe)); if (qp->recvq == NULL) { rv = -ENOMEM; @@ -1624,7 +1624,7 @@ int siw_create_srq(struct ib_srq *base_srq, srq->recvq = vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe)); else - srq->recvq = vzalloc(srq->num_rqe * sizeof(struct siw_rqe)); + srq->recvq = vcalloc(srq->num_rqe, sizeof(struct siw_rqe)); if (srq->recvq == NULL) { rv = -ENOMEM; -- cgit From 666f526b6dd1851184abc12f7901c813a097fa93 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:34 +0200 Subject: RDMA/bnxt_re: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Link: https://lore.kernel.org/r/20230627144339.144478-20-Julia.Lawall@inria.fr Signed-off-by: Julia Lawall Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 5fd8f7c90bb0..d47764c38461 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -118,11 +118,11 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, else pages = sginfo->npages; /* page ptr arrays */ - pbl->pg_arr = vmalloc(pages * sizeof(void *)); + pbl->pg_arr = vmalloc_array(pages, sizeof(void *)); if (!pbl->pg_arr) return -ENOMEM; - pbl->pg_map_arr = vmalloc(pages * sizeof(dma_addr_t)); + pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t)); if (!pbl->pg_map_arr) { vfree(pbl->pg_arr); pbl->pg_arr = NULL; -- cgit From bad5b6e34ffbaacc77ad28a0f482e33b3929e635 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Jul 2023 11:12:12 -0400 Subject: RDMA/siw: Fabricate a GID on tun and loopback devices LOOPBACK and NONE (tunnel) devices have all-zero MAC addresses. Currently, siw_device_create() falls back to copying the IB device's name in those cases, because an all-zero MAC address breaks the RDMA core address resolution mechanism. However, at the point when siw_device_create() constructs a GID, the ib_device::name field is uninitialized, leaving the MAC address to remain in an all-zero state. Fabricate a random artificial GID for such devices, and ensure this artificial GID is returned for all device query operations. Link: https://lore.kernel.org/r/168960673260.3007.12378736853793339110.stgit@manet.1015granger.net Reported-by: Tom Talpey Fixes: a2d36b02c15d ("RDMA/siw: Enable siw on tunnel devices") Reviewed-by: Bernard Metzler Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 1 + drivers/infiniband/sw/siw/siw_main.c | 22 ++++++++-------------- drivers/infiniband/sw/siw/siw_verbs.c | 4 ++-- 3 files changed, 11 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 2f3a9cda3850..8b4a710b82bc 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -74,6 +74,7 @@ struct siw_device { u32 vendor_part_id; int numa_node; + char raw_gid[ETH_ALEN]; /* physical port state (only one port per device) */ enum ib_port_state state; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 65b5cda5457b..f45600d169ae 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -75,8 +75,7 @@ static int siw_device_register(struct siw_device *sdev, const char *name) return rv; } - siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); - + siw_dbg(base_dev, "HWaddr=%pM\n", sdev->raw_gid); return 0; } @@ -313,24 +312,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) return NULL; base_dev = &sdev->base_dev; - sdev->netdev = netdev; - if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - netdev->dev_addr); + if (netdev->addr_len) { + memcpy(sdev->raw_gid, netdev->dev_addr, + min_t(unsigned int, netdev->addr_len, ETH_ALEN)); } else { /* - * This device does not have a HW address, - * but connection mangagement lib expects gid != 0 + * This device does not have a HW address, but + * connection mangagement requires a unique gid. */ - size_t len = min_t(size_t, strlen(base_dev->name), 6); - char addr[6] = { }; - - memcpy(addr, base_dev->name, len); - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - addr); + eth_random_addr(sdev->raw_gid); } + addrconf_addr_eui48((u8 *)&base_dev->node_guid, sdev->raw_gid); base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 296d839ee876..fadfa70853f3 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -157,7 +157,7 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->vendor_part_id = sdev->vendor_part_id; addrconf_addr_eui48((u8 *)&attr->sys_image_guid, - sdev->netdev->dev_addr); + sdev->raw_gid); return 0; } @@ -218,7 +218,7 @@ int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, /* subnet_prefix == interface_id == 0; */ memset(gid, 0, sizeof(*gid)); - memcpy(&gid->raw[0], sdev->netdev->dev_addr, 6); + memcpy(gid->raw, sdev->raw_gid, ETH_ALEN); return 0; } -- cgit From 448d15aab34293bf139f17c17910e854d9ad7d6c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Jul 2023 11:12:19 -0400 Subject: RDMA/core: Set gid_attr.ndev for iWARP devices Have the iwarp side properly set the ndev in the device's sgid_attrs so that address resolution can treat it more like a RoCE device. Link: https://lore.kernel.org/r/168960673933.3007.8043081822081877578.stgit@manet.1015granger.net Suggested-by: Jason Gunthorpe Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 2e91d8879326..33f9d02f9b60 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1457,6 +1457,17 @@ static int config_non_roce_gid_cache(struct ib_device *device, i); goto err; } + + if (rdma_protocol_iwarp(device, port)) { + struct net_device *ndev; + + ndev = ib_device_get_netdev(device, port); + if (!ndev) + continue; + RCU_INIT_POINTER(gid_attr.ndev, ndev); + dev_put(ndev); + } + gid_attr.index = i; tprops->subnet_prefix = be64_to_cpu(gid_attr.gid.global.subnet_prefix); -- cgit From 700c96497ba9acf1a3554a3cd3ba6c79db3cbcf7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Jul 2023 11:12:25 -0400 Subject: RDMA/cma: Deduplicate error flow in cma_validate_port() Clean up to prepare for the addition of new logic. Link: https://lore.kernel.org/r/168960674597.3007.6128252077812202526.stgit@manet.1015granger.net Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1ee87c3aaeab..da54167723d6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -686,30 +686,31 @@ cma_validate_port(struct ib_device *device, u32 port, struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + const struct ib_gid_attr *sgid_attr = ERR_PTR(-ENODEV); int bound_if_index = dev_addr->bound_dev_if; - const struct ib_gid_attr *sgid_attr; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net)) - return ERR_PTR(-ENODEV); + goto out; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ERR_PTR(-ENODEV); + goto out; if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ERR_PTR(-ENODEV); + goto out; if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) - return ERR_PTR(-ENODEV); + goto out; } else { gid_type = IB_GID_TYPE_IB; } sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); dev_put(ndev); +out: return sgid_attr; } -- cgit From f8ef1be816bf9a0c406c696368c2264a9597a994 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Jul 2023 11:12:32 -0400 Subject: RDMA/cma: Avoid GID lookups on iWARP devices We would like to enable the use of siw on top of a VPN that is constructed and managed via a tun device. That hasn't worked up until now because ARPHRD_NONE devices (such as tun devices) have no GID for the RDMA/core to look up. But it turns out that the egress device has already been picked for us -- no GID is necessary. addr_handler() just has to do the right thing with it. Link: https://lore.kernel.org/r/168960675257.3007.4737911174148394395.stgit@manet.1015granger.net Suggested-by: Jason Gunthorpe Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index da54167723d6..8bd6cb867381 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -700,6 +700,27 @@ cma_validate_port(struct ib_device *device, u32 port, if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) goto out; + /* + * For drivers that do not associate more than one net device with + * their gid tables, such as iWARP drivers, it is sufficient to + * return the first table entry. + * + * Other driver classes might be included in the future. + */ + if (rdma_protocol_iwarp(device, port)) { + sgid_attr = rdma_get_gid_attr(device, port, 0); + if (IS_ERR(sgid_attr)) + goto out; + + rcu_read_lock(); + ndev = rcu_dereference(sgid_attr->ndev); + if (!net_eq(dev_net(ndev), dev_addr->net) || + ndev->ifindex != bound_if_index) + sgid_attr = ERR_PTR(-ENODEV); + rcu_read_unlock(); + goto out; + } + if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) -- cgit From 586e613d37ec35572a332839973b9c3bccd0c545 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Tue, 18 Jul 2023 22:02:53 -0700 Subject: RDMA/bnxt_re: Initialize Doorbell pacing feature Checks for pacing feature capability and get the doorbell pacing configuration using FW commands. Allocate a page and initialize the pacing parameters for the applications. Cleanup the page and de-initialize the pacing during device removal. Link: https://lore.kernel.org/r/1689742977-9128-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 22 +++++++ drivers/infiniband/hw/bnxt_re/main.c | 96 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/qplib_res.h | 19 ++++++ 3 files changed, 137 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ea81b2497511..1543f80a1b5c 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -112,6 +112,27 @@ struct bnxt_re_gsi_context { #define BNXT_RE_NQ_IDX 1 #define BNXT_RE_GEN_P5_MAX_VF 64 +struct bnxt_re_pacing { + u64 dbr_db_fifo_reg_off; + void *dbr_page; + u64 dbr_bar_addr; + u32 pacing_algo_th; + u32 do_pacing_save; + u32 dbq_pacing_time; /* ms */ + u32 dbr_def_do_pacing; + bool dbr_pacing; +}; + +#define BNXT_RE_DBR_PACING_TIME 5 /* ms */ +#define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */ +#define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */ +/* Default do_pacing value when there is no congestion */ +#define BNXT_RE_DBR_DO_PACING_NO_CONGESTION 0x7F /* 1 in 512 probability */ +#define BNXT_RE_DB_FIFO_ROOM_MASK 0x1FFF8000 +#define BNXT_RE_MAX_FIFO_DEPTH 0x2c00 +#define BNXT_RE_DB_FIFO_ROOM_SHIFT 15 +#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 + struct bnxt_re_dev { struct ib_device ibdev; struct list_head list; @@ -171,6 +192,7 @@ struct bnxt_re_dev { atomic_t nq_alloc_cnt; u32 is_virtfn; u32 num_vfs; + struct bnxt_re_pacing pacing; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b42166fe7454..13cd84d68e1f 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -432,9 +432,92 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) return rc; cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE; + cctx->modes.dbr_pacing = + le32_to_cpu(resp.flags_ext2) & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ? + true : false; return 0; } +static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) +{ + struct hwrm_func_dbr_pacing_qcfg_output resp = {}; + struct hwrm_func_dbr_pacing_qcfg_input req = {}; + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_fw_msg fw_msg = {}; + int rc; + + cctx = rdev->chip_ctx; + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = bnxt_send_msg(en_dev, &fw_msg); + if (rc) + return rc; + + if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) & + FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) == + FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC) + cctx->dbr_stat_db_fifo = + le32_to_cpu(resp.dbr_stat_db_fifo_reg) & + ~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; + return 0; +} + +/* Update the pacing tunable parameters to the default values */ +static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; + + pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing; + pacing_data->pacing_th = rdev->pacing.pacing_algo_th; + pacing_data->alarm_th = + pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; +} + +static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) +{ + if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) + return -EIO; + + /* Allocate a page for app use */ + rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL); + if (!rdev->pacing.dbr_page) + return -ENOMEM; + + memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE); + rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page; + + /* MAP HW window 2 for reading db fifo depth */ + writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK, + rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4); + rdev->pacing.dbr_db_fifo_reg_off = + (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) + + BNXT_RE_GRC_FIFO_REG_BASE; + rdev->pacing.dbr_bar_addr = + pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off; + + rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD; + rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME; + rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION; + rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing; + rdev->qplib_res.pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH; + rdev->qplib_res.pacing_data->fifo_room_mask = BNXT_RE_DB_FIFO_ROOM_MASK; + rdev->qplib_res.pacing_data->fifo_room_shift = BNXT_RE_DB_FIFO_ROOM_SHIFT; + rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off; + bnxt_re_set_default_pacing_data(rdev); + return 0; +} + +static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev) +{ + if (rdev->pacing.dbr_page) + free_page((u64)rdev->pacing.dbr_page); + + rdev->pacing.dbr_page = NULL; + rdev->pacing.dbr_pacing = false; +} + static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, int type) { @@ -1217,6 +1300,9 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) rdev->num_msix = 0; + if (rdev->pacing.dbr_pacing) + bnxt_re_deinitialize_dbr_pacing(rdev); + bnxt_re_destroy_chip_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) bnxt_unregister_dev(rdev->en_dev); @@ -1309,6 +1395,16 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) goto free_ring; } + if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) { + rc = bnxt_re_initialize_dbr_pacing(rdev); + if (!rc) { + rdev->pacing.dbr_pacing = true; + } else { + ibdev_err(&rdev->ibdev, + "DBR pacing disabled with error : %d\n", rc); + rdev->pacing.dbr_pacing = false; + } + } rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr, rdev->is_virtfn); if (rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index d850a553821e..57161d303c25 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -48,6 +48,7 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; struct bnxt_qplib_drv_modes { u8 wqe_mode; bool db_push; + bool dbr_pacing; }; struct bnxt_qplib_chip_ctx { @@ -58,6 +59,17 @@ struct bnxt_qplib_chip_ctx { u16 hwrm_cmd_max_timeout; struct bnxt_qplib_drv_modes modes; u64 hwrm_intf_ver; + u32 dbr_stat_db_fifo; +}; + +struct bnxt_qplib_db_pacing_data { + u32 do_pacing; + u32 pacing_th; + u32 alarm_th; + u32 fifo_max_depth; + u32 fifo_room_mask; + u32 fifo_room_shift; + u32 grc_reg_offset; }; #define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000 @@ -271,6 +283,7 @@ struct bnxt_qplib_res { struct mutex dpi_tbl_lock; bool prio; bool is_vf; + struct bnxt_qplib_db_pacing_data *pacing_data; }; static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) @@ -467,4 +480,10 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } + +static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx) +{ + return cctx->modes.dbr_pacing; +} + #endif /* __BNXT_QPLIB_RES_H__ */ -- cgit From fa8fad92ddddfc0cfb6fd9e9f645cf53a5ee78a6 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Tue, 18 Jul 2023 22:02:54 -0700 Subject: RDMA/bnxt_re: Enable pacing support for the user apps Report the pacing capability to the user applications. Link: https://lore.kernel.org/r/1689742977-9128-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index abef0b8baa7c..86b71c77b606 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -4075,6 +4075,8 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) goto cfail; } uctx->shpage_mmap = &entry->rdma_entry; + if (rdev->pacing.dbr_pacing) + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED; rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (rc) { -- cgit From ea222485788208cd79bad42d25aae9232b33a934 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Tue, 18 Jul 2023 22:02:55 -0700 Subject: RDMA/bnxt_re: Update alloc_page uapi for pacing Update the alloc_page uapi functionality for handling the mapping of doorbell pacing shared page and bar address. Link: https://lore.kernel.org/r/1689742977-9128-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 35 +++++++++++++++++++++++++++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 ++ 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 86b71c77b606..0703163b397f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -565,6 +565,8 @@ bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset, break; case BNXT_RE_MMAP_UC_DB: case BNXT_RE_MMAP_WC_DB: + case BNXT_RE_MMAP_DBR_BAR: + case BNXT_RE_MMAP_DBR_PAGE: ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx, &entry->rdma_entry, PAGE_SIZE); break; @@ -4149,6 +4151,19 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) case BNXT_RE_MMAP_SH_PAGE: ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg)); break; + case BNXT_RE_MMAP_DBR_BAR: + pfn = bnxt_entry->mem_offset >> PAGE_SHIFT; + ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + rdma_entry); + break; + case BNXT_RE_MMAP_DBR_PAGE: + /* Driver doesn't expect write access for user space */ + if (vma->vm_flags & VM_WRITE) + return -EFAULT; + ret = vm_insert_page(vma, vma->vm_start, + virt_to_page((void *)bnxt_entry->mem_offset)); + break; default: ret = -EINVAL; break; @@ -4180,7 +4195,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle * u64 mmap_offset; u32 length; u32 dpi; - u64 dbr; + u64 addr; int err; uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx); @@ -4202,19 +4217,30 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle * return -ENOMEM; length = PAGE_SIZE; dpi = uctx->wcdpi.dpi; - dbr = (u64)uctx->wcdpi.umdbr; + addr = (u64)uctx->wcdpi.umdbr; mmap_flag = BNXT_RE_MMAP_WC_DB; } else { return -EINVAL; } break; + case BNXT_RE_ALLOC_DBR_BAR_PAGE: + length = PAGE_SIZE; + addr = (u64)rdev->pacing.dbr_bar_addr; + mmap_flag = BNXT_RE_MMAP_DBR_BAR; + break; + + case BNXT_RE_ALLOC_DBR_PAGE: + length = PAGE_SIZE; + addr = (u64)rdev->pacing.dbr_page; + mmap_flag = BNXT_RE_MMAP_DBR_PAGE; + break; default: return -EOPNOTSUPP; } - entry = bnxt_re_mmap_entry_insert(uctx, dbr, mmap_flag, &mmap_offset); + entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mmap_offset); if (!entry) return -ENOMEM; @@ -4254,6 +4280,9 @@ static int alloc_page_obj_cleanup(struct ib_uobject *uobject, uctx->wcdpi.dbr = NULL; } break; + case BNXT_RE_MMAP_DBR_BAR: + case BNXT_RE_MMAP_DBR_PAGE: + break; default: goto exit; } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 32d9e9d09791..f392a09b9e2c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -146,6 +146,8 @@ enum bnxt_re_mmap_flag { BNXT_RE_MMAP_SH_PAGE, BNXT_RE_MMAP_UC_DB, BNXT_RE_MMAP_WC_DB, + BNXT_RE_MMAP_DBR_PAGE, + BNXT_RE_MMAP_DBR_BAR, }; struct bnxt_re_user_mmap_entry { -- cgit From 2ad4e6303a6d7518632739eaf67821a3553db1bd Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Tue, 18 Jul 2023 22:02:56 -0700 Subject: RDMA/bnxt_re: Implement doorbell pacing algorithm User applications alert the driver when the Doorbell FIFO reaches the alarm threshold. The driver updates the pacing parameters in the shared page to do the maximum pacing by the application till the DB FIFO congestion reduces to pacing threshold. Driver keeps checking the DB FIFO depth at the pacing interval and gradually adjusts the pacing level. Once the pacing level reaches default values (no congestion in the FIFO) pacing gets completed. Link: https://lore.kernel.org/r/1689742977-9128-7-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 5 ++ drivers/infiniband/hw/bnxt_re/main.c | 124 ++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 1543f80a1b5c..2175103d570f 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -121,8 +121,10 @@ struct bnxt_re_pacing { u32 dbq_pacing_time; /* ms */ u32 dbr_def_do_pacing; bool dbr_pacing; + struct mutex dbq_lock; /* synchronize db pacing algo */ }; +#define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF #define BNXT_RE_DBR_PACING_TIME 5 /* ms */ #define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */ #define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */ @@ -193,6 +195,8 @@ struct bnxt_re_dev { u32 is_virtfn; u32 num_vfs; struct bnxt_re_pacing pacing; + struct work_struct dbq_fifo_check_work; + struct delayed_work dbq_pacing_work; }; #define to_bnxt_re_dev(ptr, member) \ @@ -203,6 +207,7 @@ struct bnxt_re_dev { #define BNXT_RE_ROCEV2_IPV6_PACKET 3 #define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT)) +void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev); static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev) { diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 13cd84d68e1f..6469811003f6 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -475,6 +475,125 @@ static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; } +static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) +{ + u32 read_val, fifo_occup; + + /* loop shouldn't run infintely as the occupancy usually goes + * below pacing algo threshold as soon as pacing kicks in. + */ + while (1) { + read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); + fifo_occup = BNXT_RE_MAX_FIFO_DEPTH - + ((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >> + BNXT_RE_DB_FIFO_ROOM_SHIFT); + /* Fifo occupancy cannot be greater the MAX FIFO depth */ + if (fifo_occup > BNXT_RE_MAX_FIFO_DEPTH) + break; + + if (fifo_occup < rdev->qplib_res.pacing_data->pacing_th) + break; + } +} + +static void bnxt_re_db_fifo_check(struct work_struct *work) +{ + struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, + dbq_fifo_check_work); + struct bnxt_qplib_db_pacing_data *pacing_data; + u32 pacing_save; + + if (!mutex_trylock(&rdev->pacing.dbq_lock)) + return; + pacing_data = rdev->qplib_res.pacing_data; + pacing_save = rdev->pacing.do_pacing_save; + __wait_for_fifo_occupancy_below_th(rdev); + cancel_delayed_work_sync(&rdev->dbq_pacing_work); + if (pacing_save > rdev->pacing.dbr_def_do_pacing) { + /* Double the do_pacing value during the congestion */ + pacing_save = pacing_save << 1; + } else { + /* + * when a new congestion is detected increase the do_pacing + * by 8 times. And also increase the pacing_th by 4 times. The + * reason to increase pacing_th is to give more space for the + * queue to oscillate down without getting empty, but also more + * room for the queue to increase without causing another alarm. + */ + pacing_save = pacing_save << 3; + pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4; + } + + if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING) + pacing_save = BNXT_RE_MAX_DBR_DO_PACING; + + pacing_data->do_pacing = pacing_save; + rdev->pacing.do_pacing_save = pacing_data->do_pacing; + pacing_data->alarm_th = + pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; + schedule_delayed_work(&rdev->dbq_pacing_work, + msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); + mutex_unlock(&rdev->pacing.dbq_lock); +} + +static void bnxt_re_pacing_timer_exp(struct work_struct *work) +{ + struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, + dbq_pacing_work.work); + struct bnxt_qplib_db_pacing_data *pacing_data; + u32 read_val, fifo_occup; + + if (!mutex_trylock(&rdev->pacing.dbq_lock)) + return; + + pacing_data = rdev->qplib_res.pacing_data; + read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); + fifo_occup = BNXT_RE_MAX_FIFO_DEPTH - + ((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >> + BNXT_RE_DB_FIFO_ROOM_SHIFT); + + if (fifo_occup > pacing_data->pacing_th) + goto restart_timer; + + /* + * Instead of immediately going back to the default do_pacing + * reduce it by 1/8 times and restart the timer. + */ + pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3); + pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing); + if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) { + bnxt_re_set_default_pacing_data(rdev); + goto dbq_unlock; + } + +restart_timer: + schedule_delayed_work(&rdev->dbq_pacing_work, + msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); +dbq_unlock: + rdev->pacing.do_pacing_save = pacing_data->do_pacing; + mutex_unlock(&rdev->pacing.dbq_lock); +} + +void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_db_pacing_data *pacing_data; + + if (!rdev->pacing.dbr_pacing) + return; + mutex_lock(&rdev->pacing.dbq_lock); + pacing_data = rdev->qplib_res.pacing_data; + + /* + * Increase the alarm_th to max so that other user lib instances do not + * keep alerting the driver. + */ + pacing_data->alarm_th = BNXT_RE_MAX_FIFO_DEPTH; + pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING; + cancel_work_sync(&rdev->dbq_fifo_check_work); + schedule_work(&rdev->dbq_fifo_check_work); + mutex_unlock(&rdev->pacing.dbq_lock); +} + static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) { if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) @@ -506,11 +625,16 @@ static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) rdev->qplib_res.pacing_data->fifo_room_shift = BNXT_RE_DB_FIFO_ROOM_SHIFT; rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off; bnxt_re_set_default_pacing_data(rdev); + /* Initialize worker for DBR Pacing */ + INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check); + INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp); return 0; } static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev) { + cancel_work_sync(&rdev->dbq_fifo_check_work); + cancel_delayed_work_sync(&rdev->dbq_pacing_work); if (rdev->pacing.dbr_page) free_page((u64)rdev->pacing.dbr_page); -- cgit From 61a8118f60e9dde64be4f3a6e07c15014a8bfbd2 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Tue, 18 Jul 2023 22:02:57 -0700 Subject: RDMA/bnxt_re: Add a new uapi for driver notification Add driver notify uapi for application notifying the driver about the doorbell FIFO congestion. Link: https://lore.kernel.org/r/1689742977-9128-8-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0703163b397f..ec4d163f3f52 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -4183,6 +4183,15 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry) kfree(bnxt_entry); } +static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs) +{ + struct bnxt_re_ucontext *uctx; + + uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx); + bnxt_re_pacing_alert(uctx->rdev); + return 0; +} + static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE); @@ -4320,7 +4329,13 @@ DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE, &UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE), &UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE)); +DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_NOTIFY_DRV); + +DECLARE_UVERBS_GLOBAL_METHODS(BNXT_RE_OBJECT_NOTIFY_DRV, + &UVERBS_METHOD(BNXT_RE_METHOD_NOTIFY_DRV)); + const struct uapi_definition bnxt_re_uapi_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_NOTIFY_DRV), {} }; -- cgit From 2f5833ead7ea332889cfbdd4ac7b84279fdd3cef Mon Sep 17 00:00:00 2001 From: Minjie Du Date: Wed, 5 Jul 2023 18:39:50 +0800 Subject: RDMA/qedr: Remove a duplicate assignment in qedr_create_gsi_qp() Delete a duplicate statement from this function implementation. Signed-off-by: Minjie Du Link: https://lore.kernel.org/r/20230705103950.15225-1-duminjie@vivo.com Acked-by: Alok Prasad Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qedr/qedr_roce_cm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 05307c1488b8..859f66a51bd2 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -354,7 +354,6 @@ int qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs, /* the GSI CQ is handled by the driver so remove it from the FW */ qedr_destroy_gsi_cq(dev, attrs); dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI; - dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI; DP_DEBUG(dev, QEDR_MSG_GSI, "created GSI QP %p\n", qp); -- cgit From 44725a87381353075273618eeedc9127e99c378e Mon Sep 17 00:00:00 2001 From: Minjie Du Date: Wed, 5 Jul 2023 11:18:49 +0800 Subject: RDMA/qedr: Remove duplicate assignments of va Avoid double assignment of iwqp->ietf_mem.va. Signed-off-by: Minjie Du Link: https://lore.kernel.org/r/20230705031849.2443-1-duminjie@vivo.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 8ea55c6a3fba..70017048d7d1 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3627,7 +3627,6 @@ void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp) iwqp->ietf_mem.size, iwqp->ietf_mem.va, iwqp->ietf_mem.pa); iwqp->ietf_mem.va = NULL; - iwqp->ietf_mem.va = NULL; } } -- cgit From f5a61344ed23e5c5786c399a4c2d0e18af17014a Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Fri, 21 Jul 2023 10:51:45 +0800 Subject: RDMA/hns: Support get XRCD number from firmware Support driver get the num of XRCD from firmware. Signed-off-by: Luoyouming Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230721025146.450831-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 ++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8f7eb11066b4..8427e8d319b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2075,9 +2075,6 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM; - caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM; - caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; @@ -2200,6 +2197,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH); + caps->num_xrcds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_XRCDS); caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS); caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS); caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD); @@ -2220,6 +2218,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS); caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT); caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS); + caps->reserved_xrcds = hr_reg_read(resp_e, PF_CAPS_E_RSV_XRCDS); caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS); caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 7033eae2407c..2b87f0cf06ec 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -42,7 +42,6 @@ #define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000 -#define HNS_ROCE_V2_RSV_XRCD_NUM 0 #define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10 @@ -1202,6 +1201,7 @@ struct hns_roce_query_pf_caps_c { #define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32) #define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52) #define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64) +#define PF_CAPS_C_NUM_XRCDS PF_CAPS_C_FIELD_LOC(91, 87) #define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96) #define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128) #define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148) @@ -1260,6 +1260,7 @@ struct hns_roce_query_pf_caps_e { #define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0) #define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20) #define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32) +#define PF_CAPS_E_RSV_XRCDS PF_CAPS_E_FIELD_LOC(63, 52) #define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64) #define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96) -- cgit From 0b5eed06832c87275ee67f69a943d811b1fe066d Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 21 Jul 2023 10:51:46 +0800 Subject: RDMA/hns: Remove VF extend configuration Remove VF extend configuration since the relative registers are configured in firmware currently. Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230721025146.450831-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 84 ++++------------------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 10 ---- 3 files changed, 10 insertions(+), 85 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 84239b907de2..6084c1649000 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -714,7 +714,6 @@ struct hns_roce_caps { u32 max_rq_sg; u32 rsv0; u32 num_qps; - u32 num_pi_qps; u32 reserved_qps; u32 num_srqs; u32 max_wqes; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8427e8d319b7..30451cef5376 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1680,29 +1680,6 @@ static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf) return 0; } -static int load_ext_cfg_caps(struct hns_roce_dev *hr_dev, bool is_vf) -{ - struct hns_roce_cmq_desc desc; - struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; - struct hns_roce_caps *caps = &hr_dev->caps; - u32 func_num, qp_num; - int ret; - - hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, true); - ret = hns_roce_cmq_send(hr_dev, &desc, 1); - if (ret) - return ret; - - func_num = is_vf ? 1 : max_t(u32, 1, hr_dev->func_num); - qp_num = hr_reg_read(req, EXT_CFG_QP_PI_NUM) / func_num; - caps->num_pi_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM); - - qp_num = hr_reg_read(req, EXT_CFG_QP_NUM) / func_num; - caps->num_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM); - - return 0; -} - static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; @@ -1723,50 +1700,37 @@ static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev) return 0; } -static int query_func_resource_caps(struct hns_roce_dev *hr_dev, bool is_vf) +static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; int ret; - ret = load_func_res_caps(hr_dev, is_vf); + ret = load_func_res_caps(hr_dev, false); if (ret) { - dev_err(dev, "failed to load res caps, ret = %d (%s).\n", ret, - is_vf ? "vf" : "pf"); + dev_err(dev, "failed to load pf res caps, ret = %d.\n", ret); return ret; } - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - ret = load_ext_cfg_caps(hr_dev, is_vf); - if (ret) - dev_err(dev, "failed to load ext cfg, ret = %d (%s).\n", - ret, is_vf ? "vf" : "pf"); - } + ret = load_pf_timer_res_caps(hr_dev); + if (ret) + dev_err(dev, "failed to load pf timer resource, ret = %d.\n", + ret); return ret; } -static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) +static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; int ret; - ret = query_func_resource_caps(hr_dev, false); + ret = load_func_res_caps(hr_dev, true); if (ret) - return ret; - - ret = load_pf_timer_res_caps(hr_dev); - if (ret) - dev_err(dev, "failed to load pf timer resource, ret = %d.\n", - ret); + dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret); return ret; } -static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) -{ - return query_func_resource_caps(hr_dev, true); -} - static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, u32 vf_id) { @@ -1849,24 +1813,6 @@ static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id) return hns_roce_cmq_send(hr_dev, desc, 2); } -static int config_vf_ext_resource(struct hns_roce_dev *hr_dev, u32 vf_id) -{ - struct hns_roce_cmq_desc desc; - struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; - struct hns_roce_caps *caps = &hr_dev->caps; - - hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, false); - - hr_reg_write(req, EXT_CFG_VF_ID, vf_id); - - hr_reg_write(req, EXT_CFG_QP_PI_NUM, caps->num_pi_qps); - hr_reg_write(req, EXT_CFG_QP_PI_IDX, vf_id * caps->num_pi_qps); - hr_reg_write(req, EXT_CFG_QP_NUM, caps->num_qps); - hr_reg_write(req, EXT_CFG_QP_IDX, vf_id * caps->num_qps); - - return hns_roce_cmq_send(hr_dev, &desc, 1); -} - static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) { u32 func_num = max_t(u32, 1, hr_dev->func_num); @@ -1881,16 +1827,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) vf_id, ret); return ret; } - - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - ret = config_vf_ext_resource(hr_dev, vf_id); - if (ret) { - dev_err(hr_dev->dev, - "failed to config vf-%u ext res, ret = %d.\n", - vf_id, ret); - return ret; - } - } } return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 2b87f0cf06ec..d9693f6cc802 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -219,7 +219,6 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_VF_RES = 0x850e, HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, - HNS_ROCE_OPC_EXT_CFG = 0x8512, HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -956,15 +955,6 @@ struct hns_roce_func_clear { #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40 #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20 -/* Fields of HNS_ROCE_OPC_EXT_CFG */ -#define EXT_CFG_VF_ID CMQ_REQ_FIELD_LOC(31, 0) -#define EXT_CFG_QP_PI_IDX CMQ_REQ_FIELD_LOC(45, 32) -#define EXT_CFG_QP_PI_NUM CMQ_REQ_FIELD_LOC(63, 48) -#define EXT_CFG_QP_NUM CMQ_REQ_FIELD_LOC(87, 64) -#define EXT_CFG_QP_IDX CMQ_REQ_FIELD_LOC(119, 96) -#define EXT_CFG_LLM_IDX CMQ_REQ_FIELD_LOC(139, 128) -#define EXT_CFG_LLM_NUM CMQ_REQ_FIELD_LOC(156, 144) - #define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0) #define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32) #define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64) -- cgit From 24b1b5d85c1c1e1c0eb7b6d7b6986ecb6c80041d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 22 Jul 2023 18:47:24 +0200 Subject: IB/hfi1: Use struct_size() Use struct_size() instead of hand-writing it, when allocating a structure with a flex array. This is less verbose, more robust and more informative. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/f4618a67d5ae0a30eb3f2b4558c8cc790feed79a.1690044376.git.christophe.jaillet@wanadoo.fr Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/pio.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 62e7dc9bea7b..dfea53e0fdeb 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1893,9 +1893,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); } /* build new map */ - newmap = kzalloc(sizeof(*newmap) + - roundup_pow_of_two(num_vls) * - sizeof(struct pio_map_elem *), + newmap = kzalloc(struct_size(newmap, map, roundup_pow_of_two(num_vls)), GFP_KERNEL); if (!newmap) goto bail; @@ -1910,9 +1908,8 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) int sz = roundup_pow_of_two(vl_scontexts[i]); /* only allocate once */ - newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) + - sz * sizeof(struct - send_context *), + newmap->map[i] = kzalloc(struct_size(newmap->map[i], + ksc, sz), GFP_KERNEL); if (!newmap->map[i]) goto bail; -- cgit From 8cfc99dada35b8889f76fbe28115dcde1a6f0874 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 25 Jul 2023 10:55:02 -0500 Subject: RDMA/irdma: Drop a local in irdma_sc_get_next_aeqe Drop the local wqe_idx in irdma_sc_get_next_aeqe and instead store the wqe_idx in the info structure for all asynchronous events(AE) received. There is no reason it should be tied to a specific AE source. Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155505.1069-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index d88c9184007e..b90abdc85057 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -4004,7 +4004,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, { u64 temp, compl_ctx; __le64 *aeqe; - u16 wqe_idx; u8 ae_src; u8 polarity; @@ -4020,7 +4019,7 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, aeqe, 16, false); ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp); - wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp); + info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp); info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) | ((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18); info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp); @@ -4103,7 +4102,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, case IRDMA_AE_SOURCE_RQ_0011: info->qp = true; info->rq = true; - info->wqe_idx = wqe_idx; info->compl_ctx = compl_ctx; break; case IRDMA_AE_SOURCE_CQ: @@ -4117,7 +4115,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, case IRDMA_AE_SOURCE_SQ_0111: info->qp = true; info->sq = true; - info->wqe_idx = wqe_idx; info->compl_ctx = compl_ctx; break; case IRDMA_AE_SOURCE_IN_RR_WR: -- cgit From 133b1cba46c6c8b67c630eacc0a1e4969da16517 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 25 Jul 2023 10:55:03 -0500 Subject: RDMA/irdma: Refactor error handling in create CQP In case of a failure in irdma_create_cqp, do not call irdma_destroy_cqp, but cleanup all the allocated resources in reverse order. Drop the extra argument in irdma_destroy_cqp as its no longer needed. Signed-off-by: Krzysztof Czurylo Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155505.1069-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 795f7fd4f257..369eb6b6536d 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -567,7 +567,7 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, * Issue destroy cqp request and * free the resources associated with the cqp */ -static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) +static void irdma_destroy_cqp(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp *cqp = &rf->cqp; @@ -575,8 +575,8 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) if (rf->cqp_cmpl_wq) destroy_workqueue(rf->cqp_cmpl_wq); - if (free_hwcqp) - status = irdma_sc_cqp_destroy(dev->cqp); + + status = irdma_sc_cqp_destroy(dev->cqp); if (status) ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status); @@ -920,8 +920,8 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL); if (!cqp->scratch_array) { - kfree(cqp->cqp_requests); - return -ENOMEM; + status = -ENOMEM; + goto err_scratch; } dev->cqp = &cqp->sc_cqp; @@ -931,15 +931,14 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) cqp->sq.va = dma_alloc_coherent(dev->hw->device, cqp->sq.size, &cqp->sq.pa, GFP_KERNEL); if (!cqp->sq.va) { - kfree(cqp->scratch_array); - kfree(cqp->cqp_requests); - return -ENOMEM; + status = -ENOMEM; + goto err_sq; } status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx), IRDMA_HOST_CTX_ALIGNMENT_M); if (status) - goto exit; + goto err_ctx; dev->cqp->host_ctx_pa = mem.pa; dev->cqp->host_ctx = mem.va; @@ -965,7 +964,7 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info); if (status) { ibdev_dbg(to_ibdev(dev), "ERR: cqp init status %d\n", status); - goto exit; + goto err_ctx; } spin_lock_init(&cqp->req_lock); @@ -976,7 +975,7 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) ibdev_dbg(to_ibdev(dev), "ERR: cqp create failed - status %d maj_err %d min_err %d\n", status, maj_err, min_err); - goto exit; + goto err_ctx; } INIT_LIST_HEAD(&cqp->cqp_avail_reqs); @@ -990,8 +989,16 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) init_waitqueue_head(&cqp->remove_wq); return 0; -exit: - irdma_destroy_cqp(rf, false); +err_ctx: + dma_free_coherent(dev->hw->device, cqp->sq.size, + cqp->sq.va, cqp->sq.pa); + cqp->sq.va = NULL; +err_sq: + kfree(cqp->scratch_array); + cqp->scratch_array = NULL; +err_scratch: + kfree(cqp->cqp_requests); + cqp->cqp_requests = NULL; return status; } @@ -1746,7 +1753,7 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) rf->reset, rf->rdma_ver); fallthrough; case CQP_CREATED: - irdma_destroy_cqp(rf, true); + irdma_destroy_cqp(rf); fallthrough; case INITIAL_STATE: irdma_del_init_mem(rf); -- cgit From e49bad785e550fe26ca9416ffc0c85fef84be808 Mon Sep 17 00:00:00 2001 From: Krzysztof Czurylo Date: Tue, 25 Jul 2023 10:55:04 -0500 Subject: RDMA/irdma: Add table based lookup for CQ pointer during an event Add a CQ table based loookup to allow quick search for CQ pointer having CQ ID in case of CQ related asynchrononous event. The table is implemented in a similar fashion to QP table. Also add a reference counters for CQ. This is to prevent destroying CQ while an asynchronous event is being processed. The memory resource table size is sized higher with this update, and this table doesn't need to be physically contiguous, so use a vzalloc vs kzalloc to allocate the table. Signed-off-by: Krzysztof Czurylo Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155505.1069-4-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 27 +++++++++++++++++++-------- drivers/infiniband/hw/irdma/main.h | 4 ++++ drivers/infiniband/hw/irdma/utils.c | 25 +++++++++++++++++++++++++ drivers/infiniband/hw/irdma/verbs.c | 7 +++++++ drivers/infiniband/hw/irdma/verbs.h | 2 ++ 5 files changed, 57 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 369eb6b6536d..8519495d23ce 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -218,7 +218,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) struct irdma_aeqe_info *info = &aeinfo; int ret; struct irdma_qp *iwqp = NULL; - struct irdma_sc_cq *cq = NULL; struct irdma_cq *iwcq = NULL; struct irdma_sc_qp *qp = NULL; struct irdma_qp_host_ctx_info *ctx_info = NULL; @@ -335,10 +334,18 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) ibdev_err(&iwdev->ibdev, "Processing an iWARP related AE for CQ misc = 0x%04X\n", info->ae_id); - cq = (struct irdma_sc_cq *)(unsigned long) - info->compl_ctx; - iwcq = cq->back_cq; + spin_lock_irqsave(&rf->cqtable_lock, flags); + iwcq = rf->cq_table[info->qp_cq_id]; + if (!iwcq) { + spin_unlock_irqrestore(&rf->cqtable_lock, + flags); + ibdev_dbg(to_ibdev(dev), + "cq_id %d is already freed\n", info->qp_cq_id); + continue; + } + irdma_cq_add_ref(&iwcq->ibcq); + spin_unlock_irqrestore(&rf->cqtable_lock, flags); if (iwcq->ibcq.event_handler) { struct ib_event ibevent; @@ -349,6 +356,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context); } + irdma_cq_rem_ref(&iwcq->ibcq); break; case IRDMA_AE_RESET_NOT_SENT: case IRDMA_AE_LLP_DOUBT_REACHABILITY: @@ -1555,7 +1563,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf) kfree(dev->hmc_info->sd_table.sd_entry); dev->hmc_info->sd_table.sd_entry = NULL; - kfree(rf->mem_rsrc); + vfree(rf->mem_rsrc); rf->mem_rsrc = NULL; dma_free_coherent(rf->hw.device, rf->obj_mem.size, rf->obj_mem.va, rf->obj_mem.pa); @@ -1951,10 +1959,12 @@ static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) rf->allocated_arps = &rf->allocated_mcgs[BITS_TO_LONGS(rf->max_mcg)]; rf->qp_table = (struct irdma_qp **) (&rf->allocated_arps[BITS_TO_LONGS(rf->arp_table_size)]); + rf->cq_table = (struct irdma_cq **)(&rf->qp_table[rf->max_qp]); spin_lock_init(&rf->rsrc_lock); spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); + spin_lock_init(&rf->cqtable_lock); spin_lock_init(&rf->qh_list_lock); } @@ -1975,6 +1985,7 @@ static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf) rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg); rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp; + rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq; return rsrc_size; } @@ -2008,10 +2019,10 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) rf->max_mcg = rf->max_qp; rsrc_size = irdma_calc_mem_rsrc_size(rf); - rf->mem_rsrc = kzalloc(rsrc_size, GFP_KERNEL); + rf->mem_rsrc = vzalloc(rsrc_size); if (!rf->mem_rsrc) { ret = -ENOMEM; - goto mem_rsrc_kzalloc_fail; + goto mem_rsrc_vzalloc_fail; } rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; @@ -2039,7 +2050,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) return 0; -mem_rsrc_kzalloc_fail: +mem_rsrc_vzalloc_fail: bitmap_free(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index def6dd58dcd4..d3bddd48e864 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -309,7 +309,9 @@ struct irdma_pci_f { spinlock_t arp_lock; /*protect ARP table access*/ spinlock_t rsrc_lock; /* protect HW resource array access */ spinlock_t qptable_lock; /*protect QP table access*/ + spinlock_t cqtable_lock; /*protect CQ table access*/ struct irdma_qp **qp_table; + struct irdma_cq **cq_table; spinlock_t qh_list_lock; /* protect mc_qht_list */ struct mc_table_list mc_qht_list; struct irdma_msix_vector *iw_msixtbl; @@ -500,6 +502,8 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); +void irdma_cq_add_ref(struct ib_cq *ibcq); +void irdma_cq_rem_ref(struct ib_cq *ibcq); void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 71e1c5d34709..1008e158bba2 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -760,6 +760,31 @@ void irdma_qp_rem_ref(struct ib_qp *ibqp) complete(&iwqp->free_qp); } +void irdma_cq_add_ref(struct ib_cq *ibcq) +{ + struct irdma_cq *iwcq = to_iwcq(ibcq); + + refcount_inc(&iwcq->refcnt); +} + +void irdma_cq_rem_ref(struct ib_cq *ibcq) +{ + struct ib_device *ibdev = ibcq->device; + struct irdma_device *iwdev = to_iwdev(ibdev); + struct irdma_cq *iwcq = to_iwcq(ibcq); + unsigned long flags; + + spin_lock_irqsave(&iwdev->rf->cqtable_lock, flags); + if (!refcount_dec_and_test(&iwcq->refcnt)) { + spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags); + return; + } + + iwdev->rf->cq_table[iwcq->cq_num] = NULL; + spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags); + complete(&iwcq->free_cq); +} + struct ib_device *to_ibdev(struct irdma_sc_dev *dev) { return &(container_of(dev, struct irdma_pci_f, sc_dev))->iwdev->ibdev; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a7b82aea4d08..2009819bfad9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1805,6 +1805,9 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); + irdma_cq_rem_ref(ib_cq); + wait_for_completion(&iwcq->free_cq); + irdma_cq_wq_destroy(iwdev->rf, cq); spin_lock_irqsave(&iwceq->ce_lock, flags); @@ -2014,6 +2017,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, cq = &iwcq->sc_cq; cq->back_cq = iwcq; + refcount_set(&iwcq->refcnt, 1); spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); @@ -2165,6 +2169,9 @@ static int irdma_create_cq(struct ib_cq *ibcq, goto cq_destroy; } } + rf->cq_table[cq_num] = iwcq; + init_completion(&iwcq->free_cq); + return 0; cq_destroy: irdma_cq_wq_destroy(rf, cq); diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index a536e9fa85eb..9de7217df357 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -122,6 +122,8 @@ struct irdma_cq { u32 cq_mem_size; struct irdma_dma_mem kmem; struct irdma_dma_mem kmem_shadow; + struct completion free_cq; + refcount_t refcnt; spinlock_t lock; /* for poll cq */ struct irdma_pbl *iwpbl; struct irdma_pbl *iwpbl_shadow; -- cgit From 693e1cdebb50d2aa67406411ca6d5be195d62771 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 25 Jul 2023 10:55:05 -0500 Subject: RDMA/irdma: Cleanup and rename irdma_netdev_vlan_ipv6() The return value from irdma_netdev_vlan_ipv6() is not used. Rename the functions and change to a void return. Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155505.1069-5-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 23 ++++++++++------------- drivers/infiniband/hw/irdma/main.h | 2 +- drivers/infiniband/hw/irdma/verbs.c | 2 +- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 70017048d7d1..42d1e9771066 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1591,21 +1591,20 @@ done: } /** - * irdma_netdev_vlan_ipv6 - Gets the netdev and mac + * irdma_get_vlan_mac_ipv6 - Gets the vlan and mac * @addr: local IPv6 address * @vlan_id: vlan id for the given IPv6 address * @mac: mac address for the given IPv6 address * - * Returns the net_device of the IPv6 address and also sets the - * vlan id and mac for that address. + * Returns the vlan id and mac for an IPv6 address. */ -struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) +void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) { struct net_device *ip_dev = NULL; struct in6_addr laddr6; if (!IS_ENABLED(CONFIG_IPV6)) - return NULL; + return; irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr); if (vlan_id) @@ -1624,8 +1623,6 @@ struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) } } rcu_read_unlock(); - - return ip_dev; } /** @@ -3666,8 +3663,8 @@ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr); } else { cm_node->ipv4 = false; - irdma_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, - NULL); + irdma_get_vlan_mac_ipv6(cm_node->loc_addr, &cm_node->vlan_id, + NULL); } ibdev_dbg(&iwdev->ibdev, "CM: Accept vlan_id=%d\n", cm_node->vlan_id); @@ -3875,8 +3872,8 @@ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) raddr6->sin6_addr.in6_u.u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); cm_info.rem_port = ntohs(raddr6->sin6_port); - irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, - NULL); + irdma_get_vlan_mac_ipv6(cm_info.loc_addr, &cm_info.vlan_id, + NULL); } cm_info.cm_id = cm_id; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; @@ -4005,8 +4002,8 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) laddr6->sin6_addr.in6_u.u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY) { - irdma_netdev_vlan_ipv6(cm_info.loc_addr, - &cm_info.vlan_id, NULL); + irdma_get_vlan_mac_ipv6(cm_info.loc_addr, + &cm_info.vlan_id, NULL); } else { cm_info.vlan_id = 0xFFFF; wildcard = true; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index d3bddd48e864..ad2239aabbc5 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -533,7 +533,7 @@ void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, void irdma_copy_ip_ntohl(u32 *dst, __be32 *src); void irdma_copy_ip_htonl(__be32 *dst, u32 *src); u16 irdma_get_vlan_ipv4(u32 *addr); -struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); +void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, int acc, u64 *iova_start); int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw); diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 2009819bfad9..a1a42a7cd783 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3994,7 +3994,7 @@ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) { irdma_copy_ip_ntohl(ip_addr, sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32); - irdma_netdev_vlan_ipv6(ip_addr, &vlan_id, NULL); + irdma_get_vlan_mac_ipv6(ip_addr, &vlan_id, NULL); ipv4 = false; ibdev_dbg(&iwdev->ibdev, "VERBS: qp_id=%d, IP6address=%pI6\n", ibqp->qp_num, -- cgit From 063975feedb14386489619084fbb20792b87d21c Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Wed, 26 Jul 2023 07:51:18 -0700 Subject: bnxt_re: Reorganize the resource stats Move the resource stats to a separate stats structure. Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1690383081-15033-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 7 ----- drivers/infiniband/hw/bnxt_re/hw_counters.c | 17 ++++++------ drivers/infiniband/hw/bnxt_re/hw_counters.h | 11 ++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 40 ++++++++++++++--------------- drivers/infiniband/hw/bnxt_re/main.c | 14 +++++----- 5 files changed, 47 insertions(+), 42 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 2175103d570f..03a13258b140 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -175,16 +175,9 @@ struct bnxt_re_dev { struct bnxt_qplib_res qplib_res; struct bnxt_qplib_dpi dpi_privileged; - atomic_t qp_count; struct mutex qp_lock; /* protect qp list */ struct list_head qp_list; - atomic_t cq_count; - atomic_t srq_count; - atomic_t mr_count; - atomic_t mw_count; - atomic_t ah_count; - atomic_t pd_count; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 825d512799d9..8310e9a64c13 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -254,21 +254,22 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, u32 port, int index) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct ctx_hw_stats *hw_stats = NULL; + struct bnxt_re_res_cntrs *res_s = &rdev->stats.res; struct bnxt_qplib_roce_stats *err_s = NULL; + struct ctx_hw_stats *hw_stats = NULL; int rc = 0; hw_stats = rdev->qplib_ctx.stats.dma; if (!port || !stats) return -EINVAL; - stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&rdev->qp_count); - stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&rdev->srq_count); - stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count); - stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count); - stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count); - stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count); - stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count); + stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&res_s->qp_count); + stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&res_s->srq_count); + stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&res_s->cq_count); + stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&res_s->mr_count); + stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&res_s->mw_count); + stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&res_s->pd_count); + stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&res_s->ah_count); if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index 7943b2c393e4..4aa6e31f8707 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -113,6 +113,16 @@ enum bnxt_re_hw_stats { #define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1) +struct bnxt_re_res_cntrs { + atomic_t qp_count; + atomic_t cq_count; + atomic_t srq_count; + atomic_t mr_count; + atomic_t mw_count; + atomic_t ah_count; + atomic_t pd_count; +}; + struct bnxt_re_rstat { struct bnxt_qplib_roce_stats errs; struct bnxt_qplib_ext_stat ext_stat; @@ -120,6 +130,7 @@ struct bnxt_re_rstat { struct bnxt_re_stats { struct bnxt_re_rstat rstat; + struct bnxt_re_res_cntrs res; }; struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index ec4d163f3f52..b28c869ed890 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -602,7 +602,7 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, &pd->qplib_pd)) - atomic_dec(&rdev->pd_count); + atomic_dec(&rdev->stats.res.pd_count); } return 0; } @@ -665,7 +665,7 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (bnxt_re_create_fence_mr(pd)) ibdev_warn(&rdev->ibdev, "Failed to create Fence-MR\n"); - atomic_inc(&rdev->pd_count); + atomic_inc(&rdev->stats.res.pd_count); return 0; dbfail: @@ -691,7 +691,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) else goto fail; } - atomic_dec(&rdev->ah_count); + atomic_dec(&rdev->stats.res.ah_count); fail: return rc; } @@ -777,7 +777,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, wmb(); /* make sure cache is updated. */ spin_unlock_irqrestore(&uctx->sh_lock, flag); } - atomic_inc(&rdev->ah_count); + atomic_inc(&rdev->stats.res.ah_count); return 0; } @@ -838,7 +838,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) bnxt_qplib_destroy_ah(&rdev->qplib_res, &gsi_sah->qplib_ah, true); - atomic_dec(&rdev->ah_count); + atomic_dec(&rdev->stats.res.ah_count); bnxt_qplib_clean_qp(&qp->qplib_qp); ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); @@ -853,7 +853,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) mutex_lock(&rdev->qp_lock); list_del(&gsi_sqp->list); mutex_unlock(&rdev->qp_lock); - atomic_dec(&rdev->qp_count); + atomic_dec(&rdev->stats.res.qp_count); kfree(rdev->gsi_ctx.sqp_tbl); kfree(gsi_sah); @@ -900,7 +900,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) mutex_lock(&rdev->qp_lock); list_del(&qp->list); mutex_unlock(&rdev->qp_lock); - atomic_dec(&rdev->qp_count); + atomic_dec(&rdev->stats.res.qp_count); ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); @@ -1085,7 +1085,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah "Failed to allocate HW AH for Shadow QP"); goto fail; } - atomic_inc(&rdev->ah_count); + atomic_inc(&rdev->stats.res.ah_count); return ah; @@ -1153,7 +1153,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp INIT_LIST_HEAD(&qp->list); mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); - atomic_inc(&rdev->qp_count); + atomic_inc(&rdev->stats.res.qp_count); mutex_unlock(&rdev->qp_lock); return qp; fail: @@ -1535,7 +1535,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); mutex_unlock(&rdev->qp_lock); - atomic_inc(&rdev->qp_count); + atomic_inc(&rdev->stats.res.qp_count); return 0; qp_destroy: @@ -1638,7 +1638,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) nq = qplib_srq->cq->nq; bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); - atomic_dec(&rdev->srq_count); + atomic_dec(&rdev->stats.res.srq_count); if (nq) nq->budget--; return 0; @@ -1750,7 +1750,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, } if (nq) nq->budget++; - atomic_inc(&rdev->srq_count); + atomic_inc(&rdev->stats.res.srq_count); spin_lock_init(&srq->lock); return 0; @@ -2876,7 +2876,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); ib_umem_release(cq->umem); - atomic_dec(&rdev->cq_count); + atomic_dec(&rdev->stats.res.cq_count); nq->budget--; kfree(cq->cql); return 0; @@ -2960,7 +2960,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_period = cq->qplib_cq.period; nq->budget++; - atomic_inc(&rdev->cq_count); + atomic_inc(&rdev->stats.res.cq_count); spin_lock_init(&cq->cq_lock); if (udata) { @@ -3785,7 +3785,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC)) mr->ib_mr.rkey = mr->ib_mr.lkey; - atomic_inc(&rdev->mr_count); + atomic_inc(&rdev->stats.res.mr_count); return &mr->ib_mr; @@ -3818,7 +3818,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) ib_umem_release(mr->ib_umem); kfree(mr); - atomic_dec(&rdev->mr_count); + atomic_dec(&rdev->stats.res.mr_count); return rc; } @@ -3886,7 +3886,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, goto fail_mr; } - atomic_inc(&rdev->mr_count); + atomic_inc(&rdev->stats.res.mr_count); return &mr->ib_mr; fail_mr: @@ -3922,7 +3922,7 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, } mw->ib_mw.rkey = mw->qplib_mw.rkey; - atomic_inc(&rdev->mw_count); + atomic_inc(&rdev->stats.res.mw_count); return &mw->ib_mw; fail: @@ -3943,7 +3943,7 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) } kfree(mw); - atomic_dec(&rdev->mw_count); + atomic_dec(&rdev->stats.res.mw_count); return rc; } @@ -4010,7 +4010,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.lkey; - atomic_inc(&rdev->mr_count); + atomic_inc(&rdev->stats.res.mr_count); return &mr->ib_mr; free_umem: diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 6469811003f6..91efa0400c59 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -918,13 +918,13 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, rdev->id = rdev->en_dev->pdev->devfn; INIT_LIST_HEAD(&rdev->qp_list); mutex_init(&rdev->qp_lock); - atomic_set(&rdev->qp_count, 0); - atomic_set(&rdev->cq_count, 0); - atomic_set(&rdev->srq_count, 0); - atomic_set(&rdev->mr_count, 0); - atomic_set(&rdev->mw_count, 0); - atomic_set(&rdev->ah_count, 0); - atomic_set(&rdev->pd_count, 0); + atomic_set(&rdev->stats.res.qp_count, 0); + atomic_set(&rdev->stats.res.cq_count, 0); + atomic_set(&rdev->stats.res.srq_count, 0); + atomic_set(&rdev->stats.res.mr_count, 0); + atomic_set(&rdev->stats.res.mw_count, 0); + atomic_set(&rdev->stats.res.ah_count, 0); + atomic_set(&rdev->stats.res.pd_count, 0); rdev->cosq[0] = 0xFFFF; rdev->cosq[1] = 0xFFFF; -- cgit From cb95709e0dca7a2dee1c168a2100b5fa21ca6205 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Wed, 26 Jul 2023 07:51:19 -0700 Subject: bnxt_re: Update the hw counters for resource stats Report the additional resource counters which enables better debugging. Includes active RC/UD QPs, Watermark of the resources and a count that indicates the resize cq operations after driver load. Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1690383081-15033-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 24 +++++++++++++ drivers/infiniband/hw/bnxt_re/hw_counters.h | 24 +++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 55 ++++++++++++++++++++++++----- 3 files changed, 94 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 8310e9a64c13..8598af579ab0 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -61,10 +61,22 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_ACTIVE_PD].name = "active_pds", [BNXT_RE_ACTIVE_AH].name = "active_ahs", [BNXT_RE_ACTIVE_QP].name = "active_qps", + [BNXT_RE_ACTIVE_RC_QP].name = "active_rc_qps", + [BNXT_RE_ACTIVE_UD_QP].name = "active_ud_qps", [BNXT_RE_ACTIVE_SRQ].name = "active_srqs", [BNXT_RE_ACTIVE_CQ].name = "active_cqs", [BNXT_RE_ACTIVE_MR].name = "active_mrs", [BNXT_RE_ACTIVE_MW].name = "active_mws", + [BNXT_RE_WATERMARK_PD].name = "watermark_pds", + [BNXT_RE_WATERMARK_AH].name = "watermark_ahs", + [BNXT_RE_WATERMARK_QP].name = "watermark_qps", + [BNXT_RE_WATERMARK_RC_QP].name = "watermark_rc_qps", + [BNXT_RE_WATERMARK_UD_QP].name = "watermark_ud_qps", + [BNXT_RE_WATERMARK_SRQ].name = "watermark_srqs", + [BNXT_RE_WATERMARK_CQ].name = "watermark_cqs", + [BNXT_RE_WATERMARK_MR].name = "watermark_mrs", + [BNXT_RE_WATERMARK_MW].name = "watermark_mws", + [BNXT_RE_RESIZE_CQ_CNT].name = "resize_cq_cnt", [BNXT_RE_RX_PKTS].name = "rx_pkts", [BNXT_RE_RX_BYTES].name = "rx_bytes", [BNXT_RE_TX_PKTS].name = "tx_pkts", @@ -264,12 +276,24 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, return -EINVAL; stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&res_s->qp_count); + stats->value[BNXT_RE_ACTIVE_RC_QP] = atomic_read(&res_s->rc_qp_count); + stats->value[BNXT_RE_ACTIVE_UD_QP] = atomic_read(&res_s->ud_qp_count); stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&res_s->srq_count); stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&res_s->cq_count); stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&res_s->mr_count); stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&res_s->mw_count); stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&res_s->pd_count); stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&res_s->ah_count); + stats->value[BNXT_RE_WATERMARK_QP] = res_s->qp_watermark; + stats->value[BNXT_RE_WATERMARK_RC_QP] = res_s->rc_qp_watermark; + stats->value[BNXT_RE_WATERMARK_UD_QP] = res_s->ud_qp_watermark; + stats->value[BNXT_RE_WATERMARK_SRQ] = res_s->srq_watermark; + stats->value[BNXT_RE_WATERMARK_CQ] = res_s->cq_watermark; + stats->value[BNXT_RE_WATERMARK_MR] = res_s->mr_watermark; + stats->value[BNXT_RE_WATERMARK_MW] = res_s->mw_watermark; + stats->value[BNXT_RE_WATERMARK_PD] = res_s->pd_watermark; + stats->value[BNXT_RE_WATERMARK_AH] = res_s->ah_watermark; + stats->value[BNXT_RE_RESIZE_CQ_CNT] = atomic_read(&res_s->resize_count); if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index 4aa6e31f8707..7231a2b63620 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -44,10 +44,22 @@ enum bnxt_re_hw_stats { BNXT_RE_ACTIVE_PD, BNXT_RE_ACTIVE_AH, BNXT_RE_ACTIVE_QP, + BNXT_RE_ACTIVE_RC_QP, + BNXT_RE_ACTIVE_UD_QP, BNXT_RE_ACTIVE_SRQ, BNXT_RE_ACTIVE_CQ, BNXT_RE_ACTIVE_MR, BNXT_RE_ACTIVE_MW, + BNXT_RE_WATERMARK_PD, + BNXT_RE_WATERMARK_AH, + BNXT_RE_WATERMARK_QP, + BNXT_RE_WATERMARK_RC_QP, + BNXT_RE_WATERMARK_UD_QP, + BNXT_RE_WATERMARK_SRQ, + BNXT_RE_WATERMARK_CQ, + BNXT_RE_WATERMARK_MR, + BNXT_RE_WATERMARK_MW, + BNXT_RE_RESIZE_CQ_CNT, BNXT_RE_RX_PKTS, BNXT_RE_RX_BYTES, BNXT_RE_TX_PKTS, @@ -115,12 +127,24 @@ enum bnxt_re_hw_stats { struct bnxt_re_res_cntrs { atomic_t qp_count; + atomic_t rc_qp_count; + atomic_t ud_qp_count; atomic_t cq_count; atomic_t srq_count; atomic_t mr_count; atomic_t mw_count; atomic_t ah_count; atomic_t pd_count; + atomic_t resize_count; + u64 qp_watermark; + u64 rc_qp_watermark; + u64 ud_qp_watermark; + u64 cq_watermark; + u64 srq_watermark; + u64 mr_watermark; + u64 mw_watermark; + u64 ah_watermark; + u64 pd_watermark; }; struct bnxt_re_rstat { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b28c869ed890..2b2505ad103d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -615,6 +615,7 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) udata, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd); struct bnxt_re_user_mmap_entry *entry = NULL; + u32 active_pds; int rc = 0; pd->rdev = rdev; @@ -665,7 +666,9 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (bnxt_re_create_fence_mr(pd)) ibdev_warn(&rdev->ibdev, "Failed to create Fence-MR\n"); - atomic_inc(&rdev->stats.res.pd_count); + active_pds = atomic_inc_return(&rdev->stats.res.pd_count); + if (active_pds > rdev->stats.res.pd_watermark) + rdev->stats.res.pd_watermark = active_pds; return 0; dbfail: @@ -725,6 +728,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, const struct ib_gid_attr *sgid_attr; struct bnxt_re_gid_ctx *ctx; struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); + u32 active_ahs; u8 nw_type; int rc; @@ -777,7 +781,9 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, wmb(); /* make sure cache is updated. */ spin_unlock_irqrestore(&uctx->sh_lock, flag); } - atomic_inc(&rdev->stats.res.ah_count); + active_ahs = atomic_inc_return(&rdev->stats.res.ah_count); + if (active_ahs > rdev->stats.res.ah_watermark) + rdev->stats.res.ah_watermark = active_ahs; return 0; } @@ -1487,6 +1493,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); + u32 active_qps; int rc; rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr); @@ -1535,7 +1542,18 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); mutex_unlock(&rdev->qp_lock); - atomic_inc(&rdev->stats.res.qp_count); + active_qps = atomic_inc_return(&rdev->stats.res.qp_count); + if (active_qps > rdev->stats.res.qp_watermark) + rdev->stats.res.qp_watermark = active_qps; + if (qp_init_attr->qp_type == IB_QPT_RC) { + active_qps = atomic_inc_return(&rdev->stats.res.rc_qp_count); + if (active_qps > rdev->stats.res.rc_qp_watermark) + rdev->stats.res.rc_qp_watermark = active_qps; + } else if (qp_init_attr->qp_type == IB_QPT_UD) { + active_qps = atomic_inc_return(&rdev->stats.res.ud_qp_count); + if (active_qps > rdev->stats.res.ud_qp_watermark) + rdev->stats.res.ud_qp_watermark = active_qps; + } return 0; qp_destroy: @@ -1686,6 +1704,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct bnxt_re_srq *srq; struct bnxt_re_pd *pd; struct ib_pd *ib_pd; + u32 active_srqs; int rc, entries; ib_pd = ib_srq->pd; @@ -1750,7 +1769,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, } if (nq) nq->budget++; - atomic_inc(&rdev->stats.res.srq_count); + active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); + if (active_srqs > rdev->stats.res.srq_watermark) + rdev->stats.res.srq_watermark = active_srqs; spin_lock_init(&srq->lock); return 0; @@ -2892,6 +2913,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int cqe = attr->cqe; struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + u32 active_cqs; if (attr->flags) return -EOPNOTSUPP; @@ -2960,7 +2982,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_period = cq->qplib_cq.period; nq->budget++; - atomic_inc(&rdev->stats.res.cq_count); + active_cqs = atomic_inc_return(&rdev->stats.res.cq_count); + if (active_cqs > rdev->stats.res.cq_watermark) + rdev->stats.res.cq_watermark = active_cqs; spin_lock_init(&cq->cq_lock); if (udata) { @@ -3073,6 +3097,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) } cq->ib_cq.cqe = cq->resize_cqe; + atomic_inc(&rdev->stats.res.resize_count); return 0; @@ -3758,6 +3783,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; + u32 active_mrs; int rc; mr = kzalloc(sizeof(*mr), GFP_KERNEL); @@ -3785,7 +3811,9 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC)) mr->ib_mr.rkey = mr->ib_mr.lkey; - atomic_inc(&rdev->stats.res.mr_count); + active_mrs = atomic_inc_return(&rdev->stats.res.mr_count); + if (active_mrs > rdev->stats.res.mr_watermark) + rdev->stats.res.mr_watermark = active_mrs; return &mr->ib_mr; @@ -3848,6 +3876,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr = NULL; + u32 active_mrs; int rc; if (type != IB_MR_TYPE_MEM_REG) { @@ -3886,7 +3915,9 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, goto fail_mr; } - atomic_inc(&rdev->stats.res.mr_count); + active_mrs = atomic_inc_return(&rdev->stats.res.mr_count); + if (active_mrs > rdev->stats.res.mr_watermark) + rdev->stats.res.mr_watermark = active_mrs; return &mr->ib_mr; fail_mr: @@ -3904,6 +3935,7 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mw *mw; + u32 active_mws; int rc; mw = kzalloc(sizeof(*mw), GFP_KERNEL); @@ -3922,7 +3954,9 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, } mw->ib_mw.rkey = mw->qplib_mw.rkey; - atomic_inc(&rdev->stats.res.mw_count); + active_mws = atomic_inc_return(&rdev->stats.res.mw_count); + if (active_mws > rdev->stats.res.mw_watermark) + rdev->stats.res.mw_watermark = active_mws; return &mw->ib_mw; fail: @@ -3958,6 +3992,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct ib_umem *umem; unsigned long page_size; int umem_pgs, rc; + u32 active_mrs; if (length > BNXT_RE_MAX_MR_SIZE) { ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n", @@ -4010,7 +4045,9 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.lkey; - atomic_inc(&rdev->stats.res.mr_count); + active_mrs = atomic_inc_return(&rdev->stats.res.mr_count); + if (active_mrs > rdev->stats.res.mr_watermark) + rdev->stats.res.mr_watermark = active_mrs; return &mr->ib_mr; free_umem: -- cgit From 4405baf85a83eda03065cf5ddd5de41d7bd1881b Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Wed, 26 Jul 2023 07:51:20 -0700 Subject: bnxt_re: Expose the missing hw counters Add code to expose some of the HW counters related to tx/rx data and Congestion control. Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1690383081-15033-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 25 +++++++++++++++++++++++-- drivers/infiniband/hw/bnxt_re/hw_counters.h | 9 +++++++++ drivers/infiniband/hw/bnxt_re/qplib_sp.c | 7 +++++++ 3 files changed, 39 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 8598af579ab0..e50a1cb1984b 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -82,6 +82,8 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_TX_PKTS].name = "tx_pkts", [BNXT_RE_TX_BYTES].name = "tx_bytes", [BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors", + [BNXT_RE_TX_ERRORS].name = "tx_roce_errors", + [BNXT_RE_TX_DISCARDS].name = "tx_roce_discards", [BNXT_RE_RX_ERRORS].name = "rx_roce_errors", [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards", [BNXT_RE_TO_RETRANSMITS].name = "to_retransmits", @@ -129,14 +131,21 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_TX_READ_RES].name = "tx_read_resp", [BNXT_RE_TX_WRITE_REQ].name = "tx_write_req", [BNXT_RE_TX_SEND_REQ].name = "tx_send_req", + [BNXT_RE_TX_ROCE_PKTS].name = "tx_roce_only_pkts", + [BNXT_RE_TX_ROCE_BYTES].name = "tx_roce_only_bytes", [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req", [BNXT_RE_RX_READ_REQ].name = "rx_read_req", [BNXT_RE_RX_READ_RESP].name = "rx_read_resp", [BNXT_RE_RX_WRITE_REQ].name = "rx_write_req", [BNXT_RE_RX_SEND_REQ].name = "rx_send_req", + [BNXT_RE_RX_ROCE_PKTS].name = "rx_roce_only_pkts", + [BNXT_RE_RX_ROCE_BYTES].name = "rx_roce_only_bytes", [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts", [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes", - [BNXT_RE_OOB].name = "rx_out_of_buffer" + [BNXT_RE_OOB].name = "rx_out_of_buffer", + [BNXT_RE_TX_CNP].name = "tx_cnp_pkts", + [BNXT_RE_RX_CNP].name = "rx_cnp_pkts", + [BNXT_RE_RX_ECN].name = "rx_ecn_marked_pkts", }; static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, @@ -148,14 +157,22 @@ static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res; stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req; stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req; + stats->value[BNXT_RE_TX_ROCE_PKTS] = s->tx_roce_pkts; + stats->value[BNXT_RE_TX_ROCE_BYTES] = s->tx_roce_bytes; stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req; stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req; stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res; stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req; stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req; + stats->value[BNXT_RE_RX_ROCE_PKTS] = s->rx_roce_pkts; + stats->value[BNXT_RE_RX_ROCE_BYTES] = s->rx_roce_bytes; stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts; stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes; stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer; + stats->value[BNXT_RE_TX_CNP] = s->tx_cnp; + stats->value[BNXT_RE_RX_CNP] = s->rx_cnp; + stats->value[BNXT_RE_RX_ECN] = s->rx_ecn_marked; + stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = s->rx_out_of_sequence; } static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev, @@ -298,6 +315,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = le64_to_cpu(hw_stats->tx_bcast_pkts); + stats->value[BNXT_RE_TX_DISCARDS] = + le64_to_cpu(hw_stats->tx_discard_pkts); + stats->value[BNXT_RE_TX_ERRORS] = + le64_to_cpu(hw_stats->tx_error_pkts); stats->value[BNXT_RE_RX_ERRORS] = le64_to_cpu(hw_stats->rx_error_pkts); stats->value[BNXT_RE_RX_DISCARDS] = @@ -319,6 +340,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, &rdev->flags); goto done; } + bnxt_re_copy_err_stats(rdev, stats, err_s); if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && !rdev->is_virtfn) { rc = bnxt_re_get_ext_stat(rdev, stats); @@ -328,7 +350,6 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } } - bnxt_re_copy_err_stats(rdev, stats, err_s); } done: diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index 7231a2b63620..f3c4e35a23d8 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -65,6 +65,8 @@ enum bnxt_re_hw_stats { BNXT_RE_TX_PKTS, BNXT_RE_TX_BYTES, BNXT_RE_RECOVERABLE_ERRORS, + BNXT_RE_TX_ERRORS, + BNXT_RE_TX_DISCARDS, BNXT_RE_RX_ERRORS, BNXT_RE_RX_DISCARDS, BNXT_RE_TO_RETRANSMITS, @@ -112,14 +114,21 @@ enum bnxt_re_hw_stats { BNXT_RE_TX_READ_RES, BNXT_RE_TX_WRITE_REQ, BNXT_RE_TX_SEND_REQ, + BNXT_RE_TX_ROCE_PKTS, + BNXT_RE_TX_ROCE_BYTES, BNXT_RE_RX_ATOMIC_REQ, BNXT_RE_RX_READ_REQ, BNXT_RE_RX_READ_RESP, BNXT_RE_RX_WRITE_REQ, BNXT_RE_RX_SEND_REQ, + BNXT_RE_RX_ROCE_PKTS, + BNXT_RE_RX_ROCE_BYTES, BNXT_RE_RX_ROCE_GOOD_PKTS, BNXT_RE_RX_ROCE_GOOD_BYTES, BNXT_RE_OOB, + BNXT_RE_TX_CNP, + BNXT_RE_RX_CNP, + BNXT_RE_RX_ECN, BNXT_RE_NUM_EXT_COUNTERS }; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index ab45f9d4bb02..7e57faab4f78 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -832,15 +832,22 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts); estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts); estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts); + estat->tx_roce_pkts = le64_to_cpu(sb->tx_roce_pkts); + estat->tx_roce_bytes = le64_to_cpu(sb->tx_roce_bytes); estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts); estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts); estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts); estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts); estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts); + estat->rx_roce_pkts = le64_to_cpu(sb->rx_roce_pkts); + estat->rx_roce_bytes = le64_to_cpu(sb->rx_roce_bytes); estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts); estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes); estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts); estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts); + estat->tx_cnp = le64_to_cpu(sb->tx_cnp_pkts); + estat->rx_cnp = le64_to_cpu(sb->rx_cnp_pkts); + estat->rx_ecn_marked = le64_to_cpu(sb->rx_ecn_marked_pkts); bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); -- cgit From 8b6573ff3420a2da1deb469a480dbc454745f784 Mon Sep 17 00:00:00 2001 From: Chandramohan Akula Date: Wed, 26 Jul 2023 07:51:21 -0700 Subject: bnxt_re: Update the debug counters for doorbell pacing Add debug counters to track the Doorbell pacing events and report the doorbell pacing debug stats. Signed-off-by: Chandramohan Akula Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1690383081-15033-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 18 ++++++++++++++++++ drivers/infiniband/hw/bnxt_re/hw_counters.h | 11 +++++++++++ drivers/infiniband/hw/bnxt_re/main.c | 3 +++ 3 files changed, 32 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index e50a1cb1984b..93572405d6fa 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -146,6 +146,10 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_TX_CNP].name = "tx_cnp_pkts", [BNXT_RE_RX_CNP].name = "rx_cnp_pkts", [BNXT_RE_RX_ECN].name = "rx_ecn_marked_pkts", + [BNXT_RE_PACING_RESCHED].name = "pacing_reschedule", + [BNXT_RE_PACING_CMPL].name = "pacing_complete", + [BNXT_RE_PACING_ALERT].name = "pacing_alerts", + [BNXT_RE_DB_FIFO_REG].name = "db_fifo_register", }; static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, @@ -278,6 +282,18 @@ static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev, err_s->res_oos_drop_count; } +static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats) +{ + struct bnxt_re_db_pacing_stats *pacing_s = &rdev->stats.pacing; + + stats->value[BNXT_RE_PACING_RESCHED] = pacing_s->resched; + stats->value[BNXT_RE_PACING_CMPL] = pacing_s->complete; + stats->value[BNXT_RE_PACING_ALERT] = pacing_s->alerts; + stats->value[BNXT_RE_DB_FIFO_REG] = + readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); +} + int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index) @@ -350,6 +366,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } } + if (rdev->pacing.dbr_pacing) + bnxt_re_copy_db_pacing_stats(rdev, stats); } done: diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index f3c4e35a23d8..e541b6f8ca9f 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -129,11 +129,21 @@ enum bnxt_re_hw_stats { BNXT_RE_TX_CNP, BNXT_RE_RX_CNP, BNXT_RE_RX_ECN, + BNXT_RE_PACING_RESCHED, + BNXT_RE_PACING_CMPL, + BNXT_RE_PACING_ALERT, + BNXT_RE_DB_FIFO_REG, BNXT_RE_NUM_EXT_COUNTERS }; #define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1) +struct bnxt_re_db_pacing_stats { + u64 resched; + u64 complete; + u64 alerts; +}; + struct bnxt_re_res_cntrs { atomic_t qp_count; atomic_t rc_qp_count; @@ -164,6 +174,7 @@ struct bnxt_re_rstat { struct bnxt_re_stats { struct bnxt_re_rstat rstat; struct bnxt_re_res_cntrs res; + struct bnxt_re_db_pacing_stats pacing; }; struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 91efa0400c59..87960ac42084 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -533,6 +533,7 @@ static void bnxt_re_db_fifo_check(struct work_struct *work) pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; schedule_delayed_work(&rdev->dbq_pacing_work, msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); + rdev->stats.pacing.alerts++; mutex_unlock(&rdev->pacing.dbq_lock); } @@ -563,12 +564,14 @@ static void bnxt_re_pacing_timer_exp(struct work_struct *work) pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing); if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) { bnxt_re_set_default_pacing_data(rdev); + rdev->stats.pacing.complete++; goto dbq_unlock; } restart_timer: schedule_delayed_work(&rdev->dbq_pacing_work, msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); + rdev->stats.pacing.resched++; dbq_unlock: rdev->pacing.do_pacing_save = pacing_data->do_pacing; mutex_unlock(&rdev->pacing.dbq_lock); -- cgit From cb06b6b3f6cbc56c534587db2aac3e0958a4a314 Mon Sep 17 00:00:00 2001 From: Haoyue Xu Date: Fri, 21 Jul 2023 17:20:52 +0800 Subject: RDMA/core: Get IB width and speed from netdev Previously, there was no way to query the number of lanes for a network card, so the same netdev_speed would result in a fixed pair of width and speed. As network card specifications become more diverse, such fixed mode is no longer suitable, so a method is needed to obtain the correct width and speed based on the number of lanes. This patch retrieves netdev lanes and speed from net_device and translates them to IB width and speed. Signed-off-by: Haoyue Xu Signed-off-by: Luoyouming Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230721092052.2090449-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 100 +++++++++++++++++++++++++++++++--------- 1 file changed, 79 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b99b3cc283b6..25367bd6dd97 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1880,6 +1880,80 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, } EXPORT_SYMBOL(ib_modify_qp_with_udata); +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes, + u16 *speed, u8 *width) +{ + if (!lanes) { + if (netdev_speed <= SPEED_1000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_SDR; + } else if (netdev_speed <= SPEED_10000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_20000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_DDR; + } else if (netdev_speed <= SPEED_25000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_40000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_FDR10; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_EDR; + } + + return; + } + + switch (lanes) { + case 1: + *width = IB_WIDTH_1X; + break; + case 2: + *width = IB_WIDTH_2X; + break; + case 4: + *width = IB_WIDTH_4X; + break; + case 8: + *width = IB_WIDTH_8X; + break; + case 12: + *width = IB_WIDTH_12X; + break; + default: + *width = IB_WIDTH_1X; + } + + switch (netdev_speed / lanes) { + case SPEED_2500: + *speed = IB_SPEED_SDR; + break; + case SPEED_5000: + *speed = IB_SPEED_DDR; + break; + case SPEED_10000: + *speed = IB_SPEED_FDR10; + break; + case SPEED_14000: + *speed = IB_SPEED_FDR; + break; + case SPEED_25000: + *speed = IB_SPEED_EDR; + break; + case SPEED_50000: + *speed = IB_SPEED_HDR; + break; + case SPEED_100000: + *speed = IB_SPEED_NDR; + break; + default: + *speed = IB_SPEED_SDR; + } +} + int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) { int rc; @@ -1904,29 +1978,13 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) netdev_speed = lksettings.base.speed; } else { netdev_speed = SPEED_1000; - pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name, - netdev_speed); + if (rc) + pr_warn("%s speed is unknown, defaulting to %u\n", + netdev->name, netdev_speed); } - if (netdev_speed <= SPEED_1000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_SDR; - } else if (netdev_speed <= SPEED_10000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_FDR10; - } else if (netdev_speed <= SPEED_20000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_DDR; - } else if (netdev_speed <= SPEED_25000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_EDR; - } else if (netdev_speed <= SPEED_40000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_FDR10; - } else { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_EDR; - } + ib_get_width_and_speed(netdev_speed, lksettings.lanes, + speed, width); return 0; } -- cgit From 3a8498720450174b8db450d3375a04dca81b3534 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 25 Jul 2023 10:55:24 -0500 Subject: RDMA/irdma: Allow accurate reporting on QP max send/recv WR Currently the attribute cap.max_send_wr and cap.max_recv_wr sent from user-space during create QP are the provider computed SQ/RQ depth as opposed to raw values passed from application. This inhibits computation of an accurate value for max_send_wr and max_recv_wr for this QP in the kernel which matches the value returned in user create QP. Also these capabilities needs to be reported from the driver in query QP. Add support by extending the ABI to allow the raw cap.max_send_wr and cap.max_recv_wr to be passed from user-space, while keeping compatibility for the older scheme. The internal HW depth and shift needed for the WQs needs to be computed now for both kernel and user-mode QPs. Add new helpers to assist with this: irdma_uk_calc_depth_shift_sq, irdma_uk_calc_depth_shift_rq and irdma_uk_calc_depth_shift_wq. Consolidate all the user mode QP setup into a new function irdma_setup_umode_qp which keeps it with its counterpart irdma_setup_kmode_qp. Signed-off-by: Youvaraj Sagar Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155525.1081-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/uk.c | 89 +++++++++++++++--- drivers/infiniband/hw/irdma/user.h | 10 ++ drivers/infiniband/hw/irdma/verbs.c | 182 ++++++++++++++++++++++-------------- drivers/infiniband/hw/irdma/verbs.h | 3 +- 4 files changed, 197 insertions(+), 87 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index dd428d915c17..fd337caa2e3b 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1414,6 +1414,78 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } +/** + * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ + * @ukinfo: qp initialization info + * @sq_shift: Returns shift of SQ + * @rq_shift: Returns shift of RQ + */ +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } +} + +/** + * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. + * @ukinfo: qp initialization info + * @sq_depth: Returns depth of SQ + * @sq_shift: Returns shift of SQ + */ +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, + *sq_shift, sq_depth); + + return status; +} + +/** + * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. + * @ukinfo: qp initialization info + * @rq_depth: Returns depth of RQ + * @rq_shift: Returns shift of RQ + */ +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift) +{ + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } + + status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, + *rq_shift, rq_depth); + + return status; +} + /** * irdma_uk_qp_init - initialize shared qp * @qp: hw qp (user and kernel) @@ -1428,23 +1500,12 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { int ret_code = 0; u32 sq_ring_size; - u8 sqshift, rqshift; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) return -EINVAL; - irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift); - if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt, - info->max_inline_data, &sqshift); - if (info->abi_ver > 4) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - } else { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt + 1, - info->max_inline_data, &sqshift); - } qp->qp_caps = info->qp_caps; qp->sq_base = info->sq; qp->rq_base = info->rq; @@ -1458,7 +1519,7 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->sq_size = info->sq_size; qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; - sq_ring_size = qp->sq_size << sqshift; + sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); if (info->first_sq_wq) { @@ -1473,9 +1534,9 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->rq_size = info->rq_size; qp->max_rq_frag_cnt = info->max_rq_frag_cnt; qp->max_inline_data = info->max_inline_data; - qp->rq_wqe_size = rqshift; + qp->rq_wqe_size = info->rq_shift; IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); - qp->rq_wqe_size_multiplier = 1 << rqshift; + qp->rq_wqe_size_multiplier = 1 << info->rq_shift; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index d0cdf609f5e0..1e0e1a71dbad 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -295,6 +295,12 @@ void irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift); +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift); +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -374,8 +380,12 @@ struct irdma_qp_uk_init_info { u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; + u32 sq_depth; + u32 rq_depth; u8 first_sq_wq; u8 type; + u8 sq_shift; + u8 rq_shift; int abi_ver; bool legacy_mode; }; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a1a42a7cd783..0187cff7b9c6 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -277,7 +277,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct irdma_alloc_ucontext_req req = {}; struct irdma_alloc_ucontext_resp uresp = {}; struct irdma_ucontext *ucontext = to_ucontext(uctx); - struct irdma_uk_attrs *uk_attrs; + struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) @@ -292,7 +292,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; - uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; + if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) + ucontext->use_raw_attrs = true; + /* GEN_1 legacy support with libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) @@ -327,6 +329,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); @@ -566,6 +569,86 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev, } } +/** + * irdma_setup_umode_qp - setup sq and rq size in user mode qp + * @iwdev: iwarp device + * @iwqp: qp ptr (user or kernel) + * @info: initialize info to return + * @init_attr: Initial QP create attributes + */ +static int irdma_setup_umode_qp(struct ib_udata *udata, + struct irdma_device *iwdev, + struct irdma_qp *iwqp, + struct irdma_qp_init_info *info, + struct ib_qp_init_attr *init_attr) +{ + struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct irdma_ucontext, ibucontext); + struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; + struct irdma_create_qp_req req; + unsigned long flags; + int ret; + + ret = ib_copy_from_udata(&req, udata, + min(sizeof(req), udata->inlen)); + if (ret) { + ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n"); + return ret; + } + + iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; + iwqp->user_mode = 1; + if (req.user_wqe_bufs) { + info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode; + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); + iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); + + if (!iwqp->iwpbl) { + ret = -ENODATA; + ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); + return ret; + } + } + + if (!ucontext->use_raw_attrs) { + /** + * Maintain backward compat with older ABI which passes sq and + * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr. + * There is no way to compute the correct value of + * iwqp->max_send_wr/max_recv_wr in the kernel. + */ + iwqp->max_send_wr = init_attr->cap.max_send_wr; + iwqp->max_recv_wr = init_attr->cap.max_recv_wr; + ukinfo->sq_size = init_attr->cap.max_send_wr; + ukinfo->rq_size = init_attr->cap.max_recv_wr; + irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, + &ukinfo->rq_shift); + } else { + ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); + if (ret) + return ret; + + ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); + if (ret) + return ret; + + iwqp->max_send_wr = + (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = + (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; + } + + irdma_setup_virt_qp(iwdev, iwqp, info); + + return 0; +} + /** * irdma_setup_kmode_qp - setup initialization for kernel mode qp * @iwdev: iwarp device @@ -579,40 +662,28 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct ib_qp_init_attr *init_attr) { struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem; - u32 sqdepth, rqdepth; - u8 sqshift, rqshift; u32 size; int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; - struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; - irdma_get_wqe_shift(uk_attrs, - uk_attrs->hw_rev >= IRDMA_GEN_2 ? ukinfo->max_sq_frag_cnt + 1 : - ukinfo->max_sq_frag_cnt, - ukinfo->max_inline_data, &sqshift); - status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift, - &sqdepth); + status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); if (status) return status; - if (uk_attrs->hw_rev == IRDMA_GEN_1) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - else - irdma_get_wqe_shift(uk_attrs, ukinfo->max_rq_frag_cnt, 0, - &rqshift); - - status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift, - &rqdepth); + status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); if (status) return status; iwqp->kqp.sq_wrid_mem = - kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.sq_wrid_mem) return -ENOMEM; iwqp->kqp.rq_wrid_mem = - kcalloc(rqdepth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + if (!iwqp->kqp.rq_wrid_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; @@ -622,7 +693,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem; ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem; - size = (sqdepth + rqdepth) * IRDMA_QP_WQE_MIN_SIZE; + size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE; size += (IRDMA_SHADOW_AREA_SIZE << 3); mem->size = ALIGN(size, 256); @@ -638,16 +709,19 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq = mem->va; info->sq_pa = mem->pa; - ukinfo->rq = &ukinfo->sq[sqdepth]; - info->rq_pa = info->sq_pa + (sqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->shadow_area = ukinfo->rq[rqdepth].elem; - info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->sq_size = sqdepth >> sqshift; - ukinfo->rq_size = rqdepth >> rqshift; + ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth]; + info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem; + info->shadow_area_pa = + info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; ukinfo->qp_id = iwqp->ibqp.qp_num; - init_attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift; - init_attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift; + iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + init_attr->cap.max_send_wr = iwqp->max_send_wr; + init_attr->cap.max_recv_wr = iwqp->max_recv_wr; return 0; } @@ -803,18 +877,14 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp = to_iwqp(ibqp); - struct irdma_create_qp_req req = {}; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; int err_code; - int sq_size; - int rq_size; struct irdma_sc_qp *qp; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {}; struct irdma_qp_host_ctx_info *ctx_info; - unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) @@ -824,13 +894,10 @@ static int irdma_create_qp(struct ib_qp *ibqp, udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) return -EINVAL; - sq_size = init_attr->cap.max_send_wr; - rq_size = init_attr->cap.max_recv_wr; - init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.uk_attrs = uk_attrs; - init_info.qp_uk_init_info.sq_size = sq_size; - init_info.qp_uk_init_info.rq_size = rq_size; + init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr; + init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; @@ -880,36 +947,9 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_waitqueue_head(&iwqp->mod_qp_waitq); if (udata) { - err_code = ib_copy_from_udata(&req, udata, - min(sizeof(req), udata->inlen)); - if (err_code) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: ib_copy_from_data fail\n"); - goto error; - } - - iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - iwqp->user_mode = 1; - if (req.user_wqe_bufs) { - struct irdma_ucontext *ucontext = - rdma_udata_to_drv_context(udata, - struct irdma_ucontext, - ibucontext); - - init_info.qp_uk_init_info.legacy_mode = ucontext->legacy_mode; - spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); - - if (!iwqp->iwpbl) { - err_code = -ENODATA; - ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); - goto error; - } - } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - irdma_setup_virt_qp(iwdev, iwqp, &init_info); + err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, + init_attr); } else { INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; @@ -964,8 +1004,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; rf->qp_table[qp_num] = iwqp; - iwqp->max_send_wr = sq_size; - iwqp->max_recv_wr = rq_size; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (dev->ws_add(&iwdev->vsi, 0)) { @@ -986,8 +1024,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) uresp.lsmm = 1; } - uresp.actual_sq_size = sq_size; - uresp.actual_rq_size = rq_size; + uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size; + uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size; uresp.qp_id = qp_num; uresp.qp_caps = qp->qp_uk.qp_caps; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 9de7217df357..5d7b983f47a2 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -18,7 +18,8 @@ struct irdma_ucontext { struct list_head qp_reg_mem_list; spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */ int abi_ver; - bool legacy_mode; + u8 legacy_mode : 1; + u8 use_raw_attrs : 1; }; struct irdma_pd { -- cgit From 72d422c2465e93d5de622173f04d666cb9854c5f Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 25 Jul 2023 10:55:25 -0500 Subject: RDMA/irdma: Use HW specific minimum WQ size HW GEN1 and GEN2 have different min WQ sizes but they are currently set to the same value. Use a gen specific attribute min_hw_wq_size and extend ABI to pass it to user-space. Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230725155525.1081-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/i40iw_hw.c | 1 + drivers/infiniband/hw/irdma/i40iw_hw.h | 2 +- drivers/infiniband/hw/irdma/icrdma_hw.c | 1 + drivers/infiniband/hw/irdma/icrdma_hw.h | 1 + drivers/infiniband/hw/irdma/irdma.h | 1 + drivers/infiniband/hw/irdma/uk.c | 12 ++++++++---- drivers/infiniband/hw/irdma/user.h | 1 + drivers/infiniband/hw/irdma/verbs.c | 2 ++ 8 files changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c index 37a40fb4d0d7..638d127fb3e0 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.c +++ b/drivers/infiniband/hw/irdma/i40iw_hw.c @@ -254,5 +254,6 @@ void i40iw_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_1; dev->hw_attrs.max_hw_outbound_msg_size = I40IW_MAX_OUTBOUND_MSG_SIZE; dev->hw_attrs.max_hw_inbound_msg_size = I40IW_MAX_INBOUND_MSG_SIZE; + dev->hw_attrs.uk_attrs.min_hw_wq_size = I40IW_MIN_WQ_SIZE; dev->hw_attrs.max_qp_wr = I40IW_MAX_QP_WRS; } diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.h b/drivers/infiniband/hw/irdma/i40iw_hw.h index 1c438b3593ea..10afc165f5ea 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.h +++ b/drivers/infiniband/hw/irdma/i40iw_hw.h @@ -140,11 +140,11 @@ enum i40iw_device_caps_const { I40IW_MAX_CQ_SIZE = 1048575, I40IW_MAX_OUTBOUND_MSG_SIZE = 2147483647, I40IW_MAX_INBOUND_MSG_SIZE = 2147483647, + I40IW_MIN_WQ_SIZE = 4 /* WQEs */, }; #define I40IW_QP_WQE_MIN_SIZE 32 #define I40IW_QP_WQE_MAX_SIZE 128 -#define I40IW_QP_SW_MIN_WQSIZE 4 #define I40IW_MAX_RQ_WQE_SHIFT 2 #define I40IW_MAX_QUANTA_PER_WR 2 diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c index 298d14905993..10ccf4bc3f2d 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.c +++ b/drivers/infiniband/hw/irdma/icrdma_hw.c @@ -195,6 +195,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT; dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_2; + dev->hw_attrs.uk_attrs.min_hw_wq_size = ICRDMA_MIN_WQ_SIZE; dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR; dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE | IRDMA_FEATURE_CQ_RESIZE; diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.h b/drivers/infiniband/hw/irdma/icrdma_hw.h index b65c463abf0b..54035a08cc93 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.h +++ b/drivers/infiniband/hw/irdma/icrdma_hw.h @@ -64,6 +64,7 @@ enum icrdma_device_caps_const { ICRDMA_MAX_IRD_SIZE = 127, ICRDMA_MAX_ORD_SIZE = 255, + ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */, }; diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index 173e2dc2fc35..3237fa64bc8f 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -119,6 +119,7 @@ struct irdma_uk_attrs { u32 min_hw_cq_size; u32 max_hw_cq_size; u16 max_hw_sq_chunk; + u16 min_hw_wq_size; u8 hw_rev; }; diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index fd337caa2e3b..ac650a784245 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1349,10 +1349,12 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { + u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + *sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD); - if (*sqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) - *sqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; + if (*sqdepth < min_size) + *sqdepth = min_size; else if (*sqdepth > uk_attrs->max_hw_wq_quanta) return -EINVAL; @@ -1369,10 +1371,12 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { + u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + *rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD); - if (*rqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) - *rqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; + if (*rqdepth < min_size) + *rqdepth = min_size; else if (*rqdepth > uk_attrs->max_hw_rq_quanta) return -EINVAL; diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 1e0e1a71dbad..dd145ec72a91 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -85,6 +85,7 @@ enum irdma_device_caps_const { IRDMA_Q2_BUF_SIZE = 256, IRDMA_QP_CTX_SIZE = 256, IRDMA_MAX_PDS = 262144, + IRDMA_MIN_WQ_SIZE_GEN2 = 8, }; enum irdma_addressing_type { diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 0187cff7b9c6..b9420b0c42b3 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -330,6 +330,8 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; + uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); -- cgit From a45e5f1859579f88df624997c38c05706f9015e3 Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Fri, 28 Jul 2023 14:51:39 +0800 Subject: RDMA/mlx: Remove unnecessary variable initializations Remove unnecessary variable initializations. Signed-off-by: Ruan Jinjie Link: https://lore.kernel.org/r/20230728065139.3411703-1-ruanjinjie@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/main.c | 44 +++++++++++++++++++-------------------- drivers/infiniband/hw/mlx5/mad.c | 40 +++++++++++++++++------------------ 2 files changed, 42 insertions(+), 42 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b18e9f2adc82..216aacd72e4f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -132,7 +132,7 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, if (dev) { if (mlx4_is_bonded(ibdev->dev)) { - struct net_device *upper = NULL; + struct net_device *upper; upper = netdev_master_upper_dev_get_rcu(dev); if (upper) { @@ -254,7 +254,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) int ret = 0; int hw_update = 0; int i; - struct gid_entry *gids = NULL; + struct gid_entry *gids; u16 vlan_id = 0xffff; u8 mac[ETH_ALEN]; @@ -345,7 +345,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) struct mlx4_port_gid_table *port_gid_table; int ret = 0; int hw_update = 0; - struct gid_entry *gids = NULL; + struct gid_entry *gids; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -431,8 +431,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_udata *uhw) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err; int have_ib_ports; struct mlx4_uverbs_ex_query_device cmd; @@ -649,8 +649,8 @@ mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num) static int ib_link_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props, int netw_view) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int ext_active_speed; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; @@ -827,8 +827,8 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port, int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid, int netw_view) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; struct mlx4_ib_dev *dev = to_mdev(ibdev); int clear = 0; @@ -892,8 +892,8 @@ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port, u64 *sl2vl_tbl) { union sl2vl_tbl_to_u64 sl2vl64; - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; int jj; @@ -952,8 +952,8 @@ static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev) int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey, int netw_view) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; @@ -1968,8 +1968,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int init_node_data(struct mlx4_ib_dev *dev) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; @@ -2621,7 +2621,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int num_req_counters; int allocated; u32 counter_index; - struct counter_index *new_counter_index = NULL; + struct counter_index *new_counter_index; pr_info_once("%s", mlx4_ib_version); @@ -2923,7 +2923,7 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, { int err; size_t flow_size; - struct ib_flow_attr *flow = NULL; + struct ib_flow_attr *flow; struct ib_flow_spec_ib *ib_spec; if (is_attach) { @@ -2943,11 +2943,11 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC, MLX4_FS_REGULAR, &mqp->reg_id); - } else { - err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); + kfree(flow); + return err; } - kfree(flow); - return err; + + return __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); } static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) @@ -2992,7 +2992,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) { - struct mlx4_ib_demux_work **dm = NULL; + struct mlx4_ib_demux_work **dm; struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9c8a7b206dcf..8102ef113b7e 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -308,8 +308,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num, int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; u16 packet_error; @@ -338,8 +338,8 @@ out: static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad) { - struct ib_smp *in_mad = NULL; - int err = -ENOMEM; + struct ib_smp *in_mad; + int err; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); if (!in_mad) @@ -358,8 +358,8 @@ static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct ib_smp *out_mad; + int err; out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!out_mad) @@ -380,8 +380,8 @@ out: int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, u16 *max_pkeys) { - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct ib_smp *out_mad; + int err; out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!out_mad) @@ -402,8 +402,8 @@ out: int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev, u32 *vendor_id) { - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct ib_smp *out_mad; + int err; out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!out_mad) @@ -423,8 +423,8 @@ out: int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -448,8 +448,8 @@ out: int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -474,8 +474,8 @@ out: int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -503,8 +503,8 @@ out: int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -545,8 +545,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port, { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int ext_active_speed; int err = -ENOMEM; -- cgit From 91f36237b4b9bdce7610c7450a906d46704a566a Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 28 Jul 2023 13:44:18 +0200 Subject: RDMA/siw: Fix tx thread initialization. Immediately removing the siw module after insertion may crash in siw_stop_tx_thread(), if the according thread did not yet had a chance to initialize its wait queue and siw_stop_tx_thread() tries to wakeup that thread. Initializing the threads state before spwaning it fixes it. Reported-by: Guoqing Jiang Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20230728114418.124328-1-bmt@zurich.ibm.com Tested-by: Guoqing Jiang Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw.h | 3 ++- drivers/infiniband/sw/siw/siw_main.c | 40 +++---------------------------- drivers/infiniband/sw/siw/siw_qp_tx.c | 44 ++++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 44 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 8b4a710b82bc..58dddb143b9f 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -531,11 +531,12 @@ void siw_qp_llp_data_ready(struct sock *sk); void siw_qp_llp_write_space(struct sock *sk); /* QP TX path functions */ +int siw_create_tx_threads(void); +void siw_stop_tx_threads(void); int siw_run_sq(void *arg); int siw_qp_sq_process(struct siw_qp *qp); int siw_sq_start(struct siw_qp *qp); int siw_activate_tx(struct siw_qp *qp); -void siw_stop_tx_thread(int nr_cpu); int siw_get_tx_cpu(struct siw_device *sdev); void siw_put_tx_cpu(int cpu); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index f45600d169ae..d4b6e0106851 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -87,29 +87,6 @@ static void siw_device_cleanup(struct ib_device *base_dev) xa_destroy(&sdev->mem_xa); } -static int siw_create_tx_threads(void) -{ - int cpu, assigned = 0; - - for_each_online_cpu(cpu) { - /* Skip HT cores */ - if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) - continue; - - siw_tx_thread[cpu] = - kthread_run_on_cpu(siw_run_sq, - (unsigned long *)(long)cpu, - cpu, "siw_tx/%u"); - if (IS_ERR(siw_tx_thread[cpu])) { - siw_tx_thread[cpu] = NULL; - continue; - } - - assigned++; - } - return assigned; -} - static int siw_dev_qualified(struct net_device *netdev) { /* @@ -529,7 +506,6 @@ static struct rdma_link_ops siw_link_ops = { static __init int siw_init_module(void) { int rv; - int nr_cpu; if (SENDPAGE_THRESH < SIW_MAX_INLINE) { pr_info("siw: sendpage threshold too small: %u\n", @@ -574,12 +550,8 @@ static __init int siw_init_module(void) return 0; out_error: - for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) { - if (siw_tx_thread[nr_cpu]) { - siw_stop_tx_thread(nr_cpu); - siw_tx_thread[nr_cpu] = NULL; - } - } + siw_stop_tx_threads(); + if (siw_crypto_shash) crypto_free_shash(siw_crypto_shash); @@ -593,14 +565,8 @@ out_error: static void __exit siw_exit_module(void) { - int cpu; + siw_stop_tx_threads(); - for_each_possible_cpu(cpu) { - if (siw_tx_thread[cpu]) { - siw_stop_tx_thread(cpu); - siw_tx_thread[cpu] = NULL; - } - } unregister_netdevice_notifier(&siw_netdev_nb); rdma_link_unregister(&siw_link_ops); ib_unregister_driver(RDMA_DRIVER_SIW); diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 7c7a51d36d0c..3ff339eceec3 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1208,10 +1208,45 @@ struct tx_task_t { static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); -void siw_stop_tx_thread(int nr_cpu) +int siw_create_tx_threads(void) { - kthread_stop(siw_tx_thread[nr_cpu]); - wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting); + int cpu, assigned = 0; + + for_each_online_cpu(cpu) { + struct tx_task_t *tx_task; + + /* Skip HT cores */ + if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) + continue; + + tx_task = &per_cpu(siw_tx_task_g, cpu); + init_llist_head(&tx_task->active); + init_waitqueue_head(&tx_task->waiting); + + siw_tx_thread[cpu] = + kthread_run_on_cpu(siw_run_sq, + (unsigned long *)(long)cpu, + cpu, "siw_tx/%u"); + if (IS_ERR(siw_tx_thread[cpu])) { + siw_tx_thread[cpu] = NULL; + continue; + } + assigned++; + } + return assigned; +} + +void siw_stop_tx_threads(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (siw_tx_thread[cpu]) { + kthread_stop(siw_tx_thread[cpu]); + wake_up(&per_cpu(siw_tx_task_g, cpu).waiting); + siw_tx_thread[cpu] = NULL; + } + } } int siw_run_sq(void *data) @@ -1221,9 +1256,6 @@ int siw_run_sq(void *data) struct siw_qp *qp; struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu); - init_llist_head(&tx_task->active); - init_waitqueue_head(&tx_task->waiting); - while (1) { struct llist_node *fifo_list = NULL; -- cgit From d43ea9c3d52f2e8ab97faa0a9349b990acfa4b61 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 31 Jul 2023 09:59:15 +0800 Subject: RDMA/irdma: Fix one kernel-doc comment Remove description of @free_hwcqp in irdma_destroy_cqp(). to silence the warning: drivers/infiniband/hw/irdma/hw.c:580: warning: Excess function parameter 'free_hwcqp' description in 'irdma_destroy_cqp' Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6028 Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230731015915.34867-1-yang.lee@linux.alibaba.com Reviewed-by: Randy Dunlap Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 8519495d23ce..9f5d418c6e4a 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -570,7 +570,6 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, /** * irdma_destroy_cqp - destroy control qp * @rf: RDMA PCI function - * @free_hwcqp: 1 if hw cqp should be freed * * Issue destroy cqp request and * free the resources associated with the cqp -- cgit From 50f338cd8847053283c82f73129ba90c08dad06c Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Mon, 31 Jul 2023 14:55:43 +0800 Subject: RDMA/mthca: Remove unnecessary NULL assignments There are many pointers assigned first, which need not to be initialized, so remove the NULL assignments. Signed-off-by: Ruan Jinjie Link: https://lore.kernel.org/r/20230731065543.2285928-1-ruanjinjie@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mthca/mthca_provider.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c46df53f26cf..e1325f2927d6 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -53,8 +53,8 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; struct mthca_dev *mdev = to_mdev(ibdev); @@ -121,8 +121,8 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr static int mthca_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -217,8 +217,8 @@ out: static int mthca_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -246,8 +246,8 @@ static int mthca_query_pkey(struct ib_device *ibdev, static int mthca_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -989,8 +989,8 @@ static const struct attribute_group mthca_attr_group = { static int mthca_init_node_data(struct mthca_dev *dev) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); -- cgit From f0ff2a2dd08df50656961c26c06c7091e3792123 Mon Sep 17 00:00:00 2001 From: Shetu Ayalew Date: Sun, 23 Jul 2023 17:21:14 +0300 Subject: IB/mlx5: Add HW counter called rx_dct_connect The rx_dct_connect counter shows the number of received connection requests for the associated DCTs. Signed-off-by: Shetu Ayalew Reviewed-by: Maor Gottlieb Link: https://lore.kernel.org/r/01cd24cd7f591734741309921fdc01fc770d84a8.1690121941.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 93257fa5aae8..8300ce622835 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -27,6 +27,7 @@ static const struct mlx5_ib_counter basic_q_cnts[] = { INIT_Q_COUNTER(rx_write_requests), INIT_Q_COUNTER(rx_read_requests), INIT_Q_COUNTER(rx_atomic_requests), + INIT_Q_COUNTER(rx_dct_connect), INIT_Q_COUNTER(out_of_buffer), }; @@ -46,6 +47,7 @@ static const struct mlx5_ib_counter vport_basic_q_cnts[] = { INIT_VPORT_Q_COUNTER(rx_write_requests), INIT_VPORT_Q_COUNTER(rx_read_requests), INIT_VPORT_Q_COUNTER(rx_atomic_requests), + INIT_VPORT_Q_COUNTER(rx_dct_connect), INIT_VPORT_Q_COUNTER(out_of_buffer), }; -- cgit From 272bba19d631e21e47f6ffa5654d3c17c57ea2ac Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Mon, 31 Jul 2023 16:51:18 +0800 Subject: RDMA: Remove unnecessary ternary operators There are a little ternary operators, the true or false judgment of which is unnecessary in C language semantics. Signed-off-by: Ruan Jinjie Link: https://lore.kernel.org/r/20230731085118.394443-1-ruanjinjie@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/netlink.c | 2 +- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ++--- drivers/infiniband/hw/bnxt_re/main.c | 7 +++---- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 +-- drivers/infiniband/hw/hns/hns_roce_hem.c | 2 +- drivers/infiniband/hw/irdma/uk.c | 12 ++++++------ drivers/infiniband/hw/irdma/verbs.c | 5 ++--- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 2 +- 9 files changed, 18 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 1b2cc9e45ade..ae2db0c70788 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -75,7 +75,7 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) if (type >= RDMA_NL_NUM_CLIENTS) return false; - return (op < max_num_ops[type]) ? true : false; + return op < max_num_ops[type]; } static const struct rdma_nl_cbs * diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 2b2505ad103d..c46fd2a47c95 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1336,8 +1336,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, qplqp->pd = &pd->qplib_pd; qplqp->qp_handle = (u64)qplqp; qplqp->max_inline_data = init_attr->cap.max_inline_data; - qplqp->sig_type = ((init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? - true : false); + qplqp->sig_type = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; qptype = bnxt_re_init_qp_type(rdev, init_attr); if (qptype < 0) { rc = qptype; @@ -2261,7 +2260,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, } is_eth = true; - is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false; + is_vlan = vlan_id && (vlan_id < 0x1000); ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh, ip_version, is_udp, 0, &qp->qp1_hdr); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 87960ac42084..d658e6798bbf 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -433,8 +433,8 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE; cctx->modes.dbr_pacing = - le32_to_cpu(resp.flags_ext2) & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ? - true : false; + le32_to_cpu(resp.flags_ext2) & + FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED; return 0; } @@ -1333,8 +1333,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) */ if ((prio_map == 0 && rdev->qplib_res.prio) || (prio_map != 0 && !rdev->qplib_res.prio)) { - rdev->qplib_res.prio = prio_map ? true : false; - + rdev->qplib_res.prio = prio_map; bnxt_re_update_gid(rdev); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 91aed77ce40d..f9dee0d2da9c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1373,8 +1373,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->state = sb->en_sqd_async_notify_state & CREQ_QUERY_QP_RESP_SB_STATE_MASK; qp->en_sqd_async_notify = sb->en_sqd_async_notify_state & - CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ? - true : false; + CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY; qp->access = sb->access; qp->pkey_index = le16_to_cpu(sb->pkey); qp->qkey = le32_to_cpu(sb->qkey); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 47c0efed1821..c4ac06a33869 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -78,7 +78,7 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) return false; } - return hop_num ? true : false; + return hop_num; } static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx, diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index ac650a784245..6f9238c4fe20 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -282,7 +282,7 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = qp->push_db; op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) @@ -383,7 +383,7 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, u16 quanta; u64 hdr; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = qp->push_db; op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) @@ -468,7 +468,7 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = qp->push_db; op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) @@ -720,7 +720,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, u32 i, total_size = 0; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = qp->push_db; op_info = &info->op.rdma_write; if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) @@ -794,7 +794,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, u32 i, total_size = 0; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = qp->push_db; op_info = &info->op.send; if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) @@ -872,7 +872,7 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, bool local_fence = false; struct ib_sge sge = {}; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = qp->push_db; op_info = &info->op.inv_local_stag; local_fence = info->local_fence; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index b9420b0c42b3..0ca5b88d82e8 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1004,7 +1004,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, refcount_set(&iwqp->refcnt, 1); spin_lock_init(&iwqp->lock); spin_lock_init(&iwqp->sc_qp.pfpdu.lock); - iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; + iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; rf->qp_table[qp_num] = iwqp; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { @@ -3547,8 +3547,7 @@ static void irdma_process_cqe(struct ib_wc *entry, set_ib_wc_op_sq(cq_poll_info, entry); } else { set_ib_wc_op_rq(cq_poll_info, entry, - qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? - true : false); + qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM); if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD && cq_poll_info->stag_invalid_set) { entry->ex.invalidate_rkey = cq_poll_info->inv_stag; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 58f994341e9a..c849fdbd4c99 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1277,7 +1277,7 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->sq.max_sges = attrs->cap.max_send_sge; qp->rq.max_sges = attrs->cap.max_recv_sge; qp->state = OCRDMA_QPS_RST; - qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; + qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; } static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d745ce9dc88a..7887a6786ed4 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1358,7 +1358,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->prev_wqe_size = 0; - qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; + qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; qp->dev = dev; if (qedr_qp_has_sq(qp)) { qedr_reset_qp_hwq_info(&qp->sq); -- cgit From e0ba8ff46704fc924e2ef0451ba196cbdc0d68f2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 20 Jun 2023 08:55:19 -0500 Subject: RDMA/rxe: Move work queue code to subroutines This patch: - Moves code to initialize a qp send work queue to a subroutine named rxe_init_sq. - Moves code to initialize a qp recv work queue to a subroutine named rxe_init_rq. - Moves initialization of qp request and response packet queues ahead of work queue initialization so that cleanup of a qp if it is not fully completed can successfully attempt to drain the packet queues without a seg fault. - Makes minor whitespace cleanups. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20230620135519.9365-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Acked-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 159 +++++++++++++++++++++++++------------ 1 file changed, 108 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a569b111a9d2..28e379c108bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -183,13 +183,63 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, atomic_set(&qp->skb_out, 0); } +static int rxe_init_sq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->sq.max_wr = init->cap.max_send_wr; + wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), + init->cap.max_inline_data); + qp->sq.max_sge = wqe_size / sizeof(struct ib_sge); + qp->sq.max_inline = wqe_size; + wqe_size += sizeof(struct rxe_send_wqe); + + qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->sq.queue) { + rxe_err_qp(qp, "Unable to allocate send queue"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap send queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, + qp->sq.queue->buf, qp->sq.queue->buf_size, + &qp->sq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + return 0; + +err_free: + vfree(qp->sq.queue->buf); + kfree(qp->sq.queue); + qp->sq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->req_pkts); err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) @@ -204,32 +254,10 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, * (0xc000 - 0xffff). */ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff); - qp->sq.max_wr = init->cap.max_send_wr; - - /* These caps are limited by rxe_qp_chk_cap() done by the caller */ - wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), - init->cap.max_inline_data); - qp->sq.max_sge = init->cap.max_send_sge = - wqe_size / sizeof(struct ib_sge); - qp->sq.max_inline = init->cap.max_inline_data = wqe_size; - wqe_size += sizeof(struct rxe_send_wqe); - type = QUEUE_TYPE_FROM_CLIENT; - qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, - wqe_size, type); - if (!qp->sq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, - qp->sq.queue->buf, qp->sq.queue->buf_size, - &qp->sq.queue->ip); - - if (err) { - vfree(qp->sq.queue->buf); - kfree(qp->sq.queue); - qp->sq.queue = NULL; + err = rxe_init_sq(qp, init, udata, uresp); + if (err) return err; - } qp->req.wqe_index = queue_get_producer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); @@ -248,36 +276,65 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return 0; } +static int rxe_init_rq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + wqe_size = sizeof(struct rxe_recv_wqe) + + qp->rq.max_sge*sizeof(struct ib_sge); + + qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->rq.queue) { + rxe_err_qp(qp, "Unable to allocate recv queue"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap recv queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, + qp->rq.queue->buf, qp->rq.queue->buf_size, + &qp->rq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_recv_wr = qp->rq.max_wr; + + return 0; + +err_free: + vfree(qp->rq.queue->buf); + kfree(qp->rq.queue); + qp->rq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->resp_pkts); if (!qp->srq) { - qp->rq.max_wr = init->cap.max_recv_wr; - qp->rq.max_sge = init->cap.max_recv_sge; - - wqe_size = rcv_wqe_size(qp->rq.max_sge); - - type = QUEUE_TYPE_FROM_CLIENT; - qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, - wqe_size, type); - if (!qp->rq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, - qp->rq.queue->buf, qp->rq.queue->buf_size, - &qp->rq.queue->ip); - if (err) { - vfree(qp->rq.queue->buf); - kfree(qp->rq.queue); - qp->rq.queue = NULL; + err = rxe_init_rq(qp, init, udata, uresp); + if (err) return err; - } } rxe_init_task(&qp->resp.task, qp, rxe_responder); @@ -307,10 +364,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, if (srq) rxe_get(srq); - qp->pd = pd; - qp->rcq = rcq; - qp->scq = scq; - qp->srq = srq; + qp->pd = pd; + qp->rcq = rcq; + qp->scq = scq; + qp->srq = srq; atomic_inc(&rcq->num_wq); atomic_inc(&scq->num_wq); -- cgit From 5993b75d0bc71cd2b441d174b028fc36180f032c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 20 Jun 2023 08:55:21 -0500 Subject: RDMA/rxe: Fix unsafe drain work queue code If create_qp does not fully succeed it is possible for qp cleanup code to attempt to drain the send or recv work queues before the queues have been created causing a seg fault. This patch checks to see if the queues exist before attempting to drain them. Link: https://lore.kernel.org/r/20230620135519.9365-3-rpearsonhpe@gmail.com Reported-by: syzbot+2da1965168e7dbcba136@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-rdma/00000000000012d89205fe7cfe00@google.com/raw Fixes: 49dc9c1f0c7e ("RDMA/rxe: Cleanup reset state handling in rxe_resp.c") Fixes: fbdeb828a21f ("RDMA/rxe: Cleanup error state handling in rxe_comp.c") Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++++ drivers/infiniband/sw/rxe/rxe_resp.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 5111735aafae..d0bdc2d8adc8 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -597,6 +597,10 @@ static void flush_send_queue(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->sq.queue; int err; + /* send queue never got created. nothing to do. */ + if (!qp->sq.queue) + return; + while ((wqe = queue_head(q, q->type))) { if (notify) { err = flush_send_wqe(qp, wqe); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 64c64f5f36a8..da470a925efc 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1469,6 +1469,10 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify) return; } + /* recv queue not created. nothing to do. */ + if (!qp->rq.queue) + return; + while ((wqe = queue_head(q, q->type))) { if (notify) { err = flush_recv_wqe(qp, wqe); -- cgit From cc28f351155def8db209647f2e20a59a7080825b Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 20 Jun 2023 09:01:43 -0500 Subject: RDMA/rxe: Fix rxe_modify_srq This patch corrects an error in rxe_modify_srq where if the caller changes the srq size the actual new value is not returned to the caller since it may be larger than what is requested. Additionally it open codes the subroutine rcv_wqe_size() which adds very little value, and makes some whitespace changes. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20230620140142.9452-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 6 ---- drivers/infiniband/sw/rxe/rxe_srq.c | 60 ++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 666e06a82bc9..4d2a8ef52c85 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -136,12 +136,6 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } -static inline int rcv_wqe_size(int max_sge) -{ - return sizeof(struct rxe_recv_wqe) + - max_sge * sizeof(struct ib_sge); -} - void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 27ca82ec0826..3661cb627d28 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -45,40 +45,41 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp) { - int err; - int srq_wqe_size; struct rxe_queue *q; - enum queue_type type; + int wqe_size; + int err; - srq->ibsrq.event_handler = init->event_handler; - srq->ibsrq.srq_context = init->srq_context; - srq->limit = init->attr.srq_limit; - srq->srq_num = srq->elem.index; - srq->rq.max_wr = init->attr.max_wr; - srq->rq.max_sge = init->attr.max_sge; + srq->ibsrq.event_handler = init->event_handler; + srq->ibsrq.srq_context = init->srq_context; + srq->limit = init->attr.srq_limit; + srq->srq_num = srq->elem.index; + srq->rq.max_wr = init->attr.max_wr; + srq->rq.max_sge = init->attr.max_sge; - srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); + wqe_size = sizeof(struct rxe_recv_wqe) + + srq->rq.max_sge*sizeof(struct ib_sge); spin_lock_init(&srq->rq.producer_lock); spin_lock_init(&srq->rq.consumer_lock); - type = QUEUE_TYPE_FROM_CLIENT; - q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); + q = rxe_queue_init(rxe, &srq->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); if (!q) { rxe_dbg_srq(srq, "Unable to allocate queue\n"); - return -ENOMEM; + err = -ENOMEM; + goto err_out; } - srq->rq.queue = q; - err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, q->buf, q->buf_size, &q->ip); if (err) { - vfree(q->buf); - kfree(q); - return err; + rxe_dbg_srq(srq, "Unable to init mmap info for caller\n"); + goto err_free; } + srq->rq.queue = q; + init->attr.max_wr = srq->rq.max_wr; + if (uresp) { if (copy_to_user(&uresp->srq_num, &srq->srq_num, sizeof(uresp->srq_num))) { @@ -88,6 +89,12 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, } return 0; + +err_free: + vfree(q->buf); + kfree(q); +err_out: + return err; } int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, @@ -145,9 +152,10 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) { - int err; struct rxe_queue *q = srq->rq.queue; struct mminfo __user *mi = NULL; + int wqe_size; + int err; if (mask & IB_SRQ_MAX_WR) { /* @@ -156,12 +164,16 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, */ mi = u64_to_user_ptr(ucmd->mmap_info_addr); - err = rxe_queue_resize(q, &attr->max_wr, - rcv_wqe_size(srq->rq.max_sge), udata, mi, - &srq->rq.producer_lock, + wqe_size = sizeof(struct rxe_recv_wqe) + + srq->rq.max_sge*sizeof(struct ib_sge); + + err = rxe_queue_resize(q, &attr->max_wr, wqe_size, + udata, mi, &srq->rq.producer_lock, &srq->rq.consumer_lock); if (err) - goto err2; + goto err_free; + + srq->rq.max_wr = attr->max_wr; } if (mask & IB_SRQ_LIMIT) @@ -169,7 +181,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, return 0; -err2: +err_free: rxe_queue_cleanup(q); srq->rq.queue = NULL; return err; -- cgit From 5d122db2ff80cd2aed4dcd630befb56b51ddf947 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Jul 2023 15:07:49 -0500 Subject: RDMA/rxe: Fix incomplete state save in rxe_requester If a send packet is dropped by the IP layer in rxe_requester() the call to rxe_xmit_packet() can fail with err == -EAGAIN. To recover, the state of the wqe is restored to the state before the packet was sent so it can be resent. However, the routines that save and restore the state miss a significnt part of the variable state in the wqe, the dma struct which is used to process through the sge table. And, the state is not saved before the packet is built which modifies the dma struct. Under heavy stress testing with many QPs on a fast node sending large messages to a slow node dropped packets are observed and the resent packets are corrupted because the dma struct was not restored. This patch fixes this behavior and allows the test cases to succeed. Fixes: 3050b9985024 ("IB/rxe: Fix race condition between requester and completer") Link: https://lore.kernel.org/r/20230721200748.4604-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 45 ++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 2171f19494bc..d8c41fd626a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -578,10 +578,11 @@ static void save_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 *rollback_psn) { - rollback_wqe->state = wqe->state; + rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; - rollback_wqe->last_psn = wqe->last_psn; - *rollback_psn = qp->req.psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_wqe->dma = wqe->dma; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, @@ -589,10 +590,11 @@ static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 rollback_psn) { - wqe->state = rollback_wqe->state; + wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; - wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_psn; + wqe->last_psn = rollback_wqe->last_psn; + wqe->dma = rollback_wqe->dma; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) @@ -797,6 +799,9 @@ int rxe_requester(struct rxe_qp *qp) pkt.mask = rxe_opcode[opcode].mask; pkt.wqe = wqe; + /* save wqe state before we build and send packet */ + save_state(wqe, qp, &rollback_wqe, &rollback_psn); + av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { rxe_dbg_qp(qp, "Failed no address vector\n"); @@ -829,29 +834,29 @@ int rxe_requester(struct rxe_qp *qp) if (ah) rxe_put(ah); - /* - * To prevent a race on wqe access between requester and completer, - * wqe members state and psn need to be set before calling - * rxe_xmit_packet(). - * Otherwise, completer might initiate an unjustified retry flow. - */ - save_state(wqe, qp, &rollback_wqe, &rollback_psn); + /* update wqe state as though we had sent it */ update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); err = rxe_xmit_packet(qp, &pkt, skb); if (err) { - qp->need_req_skb = 1; + if (err != -EAGAIN) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + /* the packet was dropped so reset wqe to the state + * before we sent it so we can try to resend + */ rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - if (err == -EAGAIN) { - rxe_sched_task(&qp->req.task); - goto exit; - } + /* force a delay until the dropped packet is freed and + * the send queue is drained below the low water mark + */ + qp->need_req_skb = 1; - wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + rxe_sched_task(&qp->req.task); + goto exit; } update_state(qp, &pkt); -- cgit From 2897f1925be9a3fad3972660ca4bb0909cd64f35 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 31 Jul 2023 21:59:16 +0800 Subject: RDMA/hns: Remove unused function declarations commit b16f8188472e ("RDMA/hns: Refactor eq code for hip06") left behind hns_roce_cleanup_eq_table(). commit 773f841ab1ae ("RDMA/hns: Avoid filling sgid index when modifying QP to RTR") leave hns_get_gid_index() unused. Remove both. Link: https://lore.kernel.org/r/20230731135916.32392-1-yuehaibing@huawei.com Signed-off-by: Yue Haibing Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6084c1649000..34e099efaae3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1111,7 +1111,6 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev); -void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); @@ -1205,7 +1204,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); -- cgit From 38313c6d2a02c28162e06753b01bd885caf9386d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 2 Aug 2023 08:46:26 -0600 Subject: RDMA/irdma: Replace one-element array with flexible-array member One-element and zero-length arrays are deprecated. So, replace one-element array in struct irdma_qvlist_info with flexible-array member. A patch for this was sent a while ago[1]. However, it seems that, at the time, the changes were partially folded[2][3], and the actual flexible-array transformation was omitted. This patch fixes that. The only binary difference seen before/after changes is shown below: | drivers/infiniband/hw/irdma/hw.o | @@ -868,7 +868,7 @@ | drivers/infiniband/hw/irdma/hw.c:484 (discriminator 2) | size += struct_size(iw_qvlist, qv_info, rf->msix_count); | 55b: imul $0x45c,%rdi,%rdi |- 562: add $0x10,%rdi |+ 562: add $0x4,%rdi which is, of course, expected as it reflects the mistake made while folding the patch I've mentioned above. Worth mentioning is the fact that with this change we save 12 bytes of memory, as can be inferred from the diff snapshot above. Notice that: $ pahole -C rdma_qv_info idrivers/infiniband/hw/irdma/hw.o struct irdma_qv_info { u32 v_idx; /* 0 4 */ u16 ceq_idx; /* 4 2 */ u16 aeq_idx; /* 6 2 */ u8 itr_idx; /* 8 1 */ /* size: 12, cachelines: 1, members: 4 */ /* padding: 3 */ /* last cacheline: 12 bytes */ }; Link: https://lore.kernel.org/linux-hardening/20210525230038.GA175516@embeddedor/ [1] Link: https://lore.kernel.org/linux-hardening/bf46b428deef4e9e89b0ea1704b1f0e5@intel.com/ [2] Link: https://lore.kernel.org/linux-rdma/20210520143809.819-1-shiraz.saleem@intel.com/T/#u [3] Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZMpsQrZadBaJGkt4@work Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index ad2239aabbc5..282cd492fe9b 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -239,7 +239,7 @@ struct irdma_qv_info { struct irdma_qvlist_info { u32 num_vectors; - struct irdma_qv_info qv_info[1]; + struct irdma_qv_info qv_info[]; }; struct irdma_gen_ops { -- cgit From f19fba1f79dc1fb298de7dcbaae9f6299381aeea Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 3 Aug 2023 01:45:21 -0700 Subject: RDMA/bnxt_re: Fix max_qp count for virtual functions Driver has not accounted QP1 for virtual functions when fetching device attributes and hence max_qp count is one less than active_qp count. Fixed driver so that it counts QP1 for virtual functions as well while fetching device attributes Fixes: ccd9d0d3dffc ("RDMA/bnxt_re: Enable RoCE on virtual functions") Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1691052326-32143-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 6 ++---- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 7 +++---- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d658e6798bbf..bbda694e9a76 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1152,8 +1152,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr, - rdev->is_virtfn); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); if (rc) goto fail; @@ -1531,8 +1530,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) rdev->pacing.dbr_pacing = false; } } - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr, - rdev->is_virtfn); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); if (rc) goto disable_rcfw; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 7e57faab4f78..b77928ac0217 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -89,7 +89,7 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, } int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr, bool vf) + struct bnxt_qplib_dev_attr *attr) { struct creq_query_func_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; @@ -121,9 +121,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, /* Extract the context from the side buffer */ attr->max_qp = le32_to_cpu(sb->max_qp); - /* max_qp value reported by FW for PF doesn't include the QP1 for PF */ - if (!vf) - attr->max_qp += 1; + /* max_qp value reported by FW doesn't include the QP1 */ + attr->max_qp += 1; attr->max_qp_rd_atom = sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ? BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 264ef3cedc45..d33c78b96217 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -322,7 +322,7 @@ int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, const u8 *smac); int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr, bool vf); + struct bnxt_qplib_dev_attr *attr); int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); -- cgit From fd28c8a8c7a10e7b53851129c6d8dc5945108fe9 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 3 Aug 2023 01:45:22 -0700 Subject: RDMA/bnxt_re: Remove a redundant flag After the cited commit, BNXT_RE_FLAG_GOT_MSIX is redundant. Remove it. Fixes: 303432211324 ("bnxt_en: Remove runtime interrupt vector allocation") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1691052326-32143-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 1 - drivers/infiniband/hw/bnxt_re/main.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 03a13258b140..9fd9849ebdd1 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -140,7 +140,6 @@ struct bnxt_re_dev { struct list_head list; unsigned long flags; #define BNXT_RE_FLAG_NETDEV_REGISTERED 0 -#define BNXT_RE_FLAG_GOT_MSIX 2 #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 #define BNXT_RE_FLAG_QOS_WORK_REG 5 diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index bbda694e9a76..2fe47b39bf74 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1422,8 +1422,8 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } - if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) - rdev->num_msix = 0; + + rdev->num_msix = 0; if (rdev->pacing.dbr_pacing) bnxt_re_deinitialize_dbr_pacing(rdev); @@ -1480,7 +1480,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", rdev->en_dev->ulp_tbl->msix_requested); rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; - set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); bnxt_re_query_hwrm_intf_version(rdev); -- cgit From c9f3e4e1d862f67d42720fdb680f4d7da64d7cc5 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 3 Aug 2023 01:45:23 -0700 Subject: RDMA/bnxt_re: Fix the sideband buffer size handling for FW commands bnxt_qplib_rcfw_alloc_sbuf allocates 24 bytes and it is better to fit on stack variables. This way we can avoid unwanted kmalloc call. Call dma_alloc_coherent directly instead of wrapper bnxt_qplib_rcfw_alloc_sbuf. Also, FW expects the side buffer needs to be aligned to BNXT_QPLIB_CMDQE_UNITS(16B). So align the size to have the extra padding bytes. Signed-off-by: Kashyap Desai Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1691052326-32143-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 38 ++++++++++-------- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 34 +--------------- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 63 +++++++++++++++--------------- 3 files changed, 55 insertions(+), 80 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index f9dee0d2da9c..282e34e8739e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -709,7 +709,7 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct creq_query_srq_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; struct creq_query_srq_resp_sb *sb; struct cmdq_query_srq req = {}; int rc = 0; @@ -719,17 +719,20 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, sizeof(req)); /* Configure the request */ - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; req.srq_cid = cpu_to_le32(srq->id); - sb = sbuf->sb; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + sb = sbuf.sb; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); srq->threshold = le16_to_cpu(sb->srq_limit); - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } @@ -1347,24 +1350,26 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct creq_query_qp_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; struct creq_query_qp_resp_sb *sb; struct cmdq_query_qp req = {}; u32 temp32[4]; int i, rc = 0; + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) + return -ENOMEM; + sb = sbuf.sb; + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_QP, sizeof(req)); - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) - return -ENOMEM; - sb = sbuf->sb; - req.qp_cid = cpu_to_le32(qp->id); - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) @@ -1423,7 +1428,8 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) memcpy(qp->smac, sb->src_mac, 6); qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id); bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index b30e66b64827..9d26871af296 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -335,7 +335,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, cpu_to_le64(sbuf->dma_addr)); __set_cmdq_base_resp_size(msg->req, msg->req_sz, ALIGN(sbuf->size, - BNXT_QPLIB_CMDQE_UNITS)); + BNXT_QPLIB_CMDQE_UNITS) / + BNXT_QPLIB_CMDQE_UNITS); } preq = (u8 *)msg->req; @@ -1196,34 +1197,3 @@ int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, return 0; } - -struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf( - struct bnxt_qplib_rcfw *rcfw, - u32 size) -{ - struct bnxt_qplib_rcfw_sbuf *sbuf; - - sbuf = kzalloc(sizeof(*sbuf), GFP_KERNEL); - if (!sbuf) - return NULL; - - sbuf->size = size; - sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size, - &sbuf->dma_addr, GFP_KERNEL); - if (!sbuf->sb) - goto bail; - - return sbuf; -bail: - kfree(sbuf); - return NULL; -} - -void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_rcfw_sbuf *sbuf) -{ - if (sbuf->sb) - dma_free_coherent(&rcfw->pdev->dev, sbuf->size, - sbuf->sb, sbuf->dma_addr); - kfree(sbuf); -} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index b77928ac0217..05ee8fdb44ad 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -94,7 +94,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, struct creq_query_func_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct creq_query_func_resp_sb *sb; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; struct cmdq_query_func req = {}; u8 *tqm_alloc; int i, rc = 0; @@ -104,16 +104,14 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, CMDQ_BASE_OPCODE_QUERY_FUNC, sizeof(req)); - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) { - dev_err(&rcfw->pdev->dev, - "SP: QUERY_FUNC alloc side buffer failed\n"); + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - } - - sb = sbuf->sb; - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + sb = sbuf.sb; + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) @@ -174,7 +172,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw); bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } @@ -717,23 +716,22 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, struct creq_query_roce_stats_resp_sb *sb; struct cmdq_query_roce_stats req = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; int rc = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_STATS, sizeof(req)); - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) { - dev_err(&rcfw->pdev->dev, - "SP: QUERY_ROCE_STATS alloc side buffer failed\n"); + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - } + sb = sbuf.sb; - sb = sbuf->sb; - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) @@ -789,7 +787,8 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, } bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } @@ -800,32 +799,31 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, struct creq_query_roce_stats_ext_resp_sb *sb; struct cmdq_query_roce_stats_ext req = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; int rc; - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) { - dev_err(&rcfw->pdev->dev, - "SP: QUERY_ROCE_STATS_EXT alloc sb failed"); + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - } + sb = sbuf.sb; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS, sizeof(req)); - req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); - req.resp_addr = cpu_to_le64(sbuf->dma_addr); + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + req.resp_addr = cpu_to_le64(sbuf.dma_addr); req.function_id = cpu_to_le32(fid); req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID); - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) goto bail; - sb = sbuf->sb; estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts); estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts); estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts); @@ -849,7 +847,8 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, estat->rx_ecn_marked = le64_to_cpu(sb->rx_ecn_marked_pkts); bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } -- cgit From e59a5cec3f8ac79dcc1ed45af5975e529fbcde18 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 3 Aug 2023 01:45:24 -0700 Subject: RDMA/bnxt_re: Cleanup bnxt_re_process_raw_qp_pkt_rx() function - Remove unnecessary memset by initializing the variables during declaration itself. - Arranged variable declarartion in RCT order. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1691052326-32143-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index c46fd2a47c95..1ce3922d9c97 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3333,26 +3333,21 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp, struct bnxt_re_dev *rdev = gsi_qp->rdev; struct bnxt_re_sqp_entries *sqp_entry = NULL; struct bnxt_re_qp *gsi_sqp = rdev->gsi_ctx.gsi_sqp; + dma_addr_t shrq_hdr_buf_map; + struct ib_sge s_sge[2] = {}; + struct ib_sge r_sge[2] = {}; struct bnxt_re_ah *gsi_sah; + struct ib_recv_wr rwr = {}; + dma_addr_t rq_hdr_buf_map; + struct ib_ud_wr udwr = {}; struct ib_send_wr *swr; - struct ib_ud_wr udwr; - struct ib_recv_wr rwr; + u32 skip_bytes = 0; int pkt_type = 0; - u32 tbl_idx; void *rq_hdr_buf; - dma_addr_t rq_hdr_buf_map; - dma_addr_t shrq_hdr_buf_map; u32 offset = 0; - u32 skip_bytes = 0; - struct ib_sge s_sge[2]; - struct ib_sge r_sge[2]; + u32 tbl_idx; int rc; - memset(&udwr, 0, sizeof(udwr)); - memset(&rwr, 0, sizeof(rwr)); - memset(&s_sge, 0, sizeof(s_sge)); - memset(&r_sge, 0, sizeof(r_sge)); - swr = &udwr.wr; tbl_idx = cqe->wr_id; -- cgit From 00d0427fd8ce034fb7f5257253806b2a8a0843e7 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 3 Aug 2023 01:45:25 -0700 Subject: RDMA/bnxt_re: Avoid unnecessary memset Avoid memset by initializing the variables during declaration itself. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1691052326-32143-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 - drivers/infiniband/hw/bnxt_re/main.c | 12 ++++-------- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1ce3922d9c97..003a07c69bc5 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2797,7 +2797,6 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_qplib_swqe wqe; int rc = 0; - memset(&wqe, 0, sizeof(wqe)); while (wr) { /* House keeping */ memset(&wqe, 0, sizeof(wqe)); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 2fe47b39bf74..c7286945feca 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -395,10 +395,9 @@ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_func_qcfg_output resp = {0}; struct hwrm_func_qcfg_input req = {0}; - struct bnxt_fw_msg fw_msg; + struct bnxt_fw_msg fw_msg = {}; int rc; - memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, @@ -969,7 +968,7 @@ static int bnxt_re_handle_unaffi_async_event(struct creq_func_event static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, struct bnxt_re_qp *qp) { - struct ib_event event; + struct ib_event event = {}; unsigned int flags; if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && @@ -979,7 +978,6 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, bnxt_re_unlock_cqs(qp, flags); } - memset(&event, 0, sizeof(event)); if (qp->qplib_qp.srq) { event.device = &qp->rdev->ibdev; event.element.qp = &qp->ib_qp; @@ -1299,11 +1297,10 @@ static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) { u32 prio_map = 0, tmp_map = 0; struct net_device *netdev; - struct dcb_app app; + struct dcb_app app = {}; netdev = rdev->netdev; - memset(&app, 0, sizeof(app)); app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE; app.protocol = ETH_P_IBOE; tmp_map = dcb_ieee_getapp_mask(netdev, &app); @@ -1445,15 +1442,14 @@ static void bnxt_re_worker(struct work_struct *work) static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) { + struct bnxt_re_ring_attr rattr = {}; struct bnxt_qplib_creq_ctx *creq; - struct bnxt_re_ring_attr rattr; u32 db_offt; int vid; u8 type; int rc; /* Registered a new RoCE device instance to netdev */ - memset(&rattr, 0, sizeof(rattr)); rc = bnxt_re_register_netdev(rdev); if (rc) { ibdev_err(&rdev->ibdev, -- cgit From 14611b9b984125b2da5966d0725fdd89f6564c45 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 3 Aug 2023 01:45:26 -0700 Subject: RDMA/bnxt_re: Remove unnecessary variable initializations Remove unnecessary variable initializations. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1691052326-32143-7-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 14 +++++++------- drivers/infiniband/hw/bnxt_re/main.c | 12 ++++++------ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 6 +++--- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.c | 8 ++++---- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 8 ++++---- 6 files changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 003a07c69bc5..c0a7181247f6 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -284,7 +284,7 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num, int index, union ib_gid *gid) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - int rc = 0; + int rc; /* Ignore port_num */ memset(gid, 0, sizeof(*gid)); @@ -684,7 +684,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; bool block = true; - int rc = 0; + int rc; block = !(flags & RDMA_DESTROY_AH_SLEEPABLE); rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block); @@ -834,7 +834,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) struct bnxt_re_qp *gsi_sqp; struct bnxt_re_ah *gsi_sah; struct bnxt_re_dev *rdev; - int rc = 0; + int rc; rdev = qp->rdev; gsi_sqp = rdev->gsi_ctx.gsi_sqp; @@ -1441,7 +1441,7 @@ static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, { struct bnxt_re_dev *rdev; struct bnxt_qplib_qp *qplqp; - int rc = 0; + int rc; rdev = qp->rdev; qplqp = &qp->qplib_qp; @@ -1872,7 +1872,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev, int qp_attr_mask) { struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp; - int rc = 0; + int rc; if (qp_attr_mask & IB_QP_STATE) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE; @@ -2222,7 +2222,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, u8 ip_version = 0; u16 vlan_id = 0xFFFF; void *buf; - int i, rc = 0; + int i, rc; memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); @@ -3586,7 +3586,7 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp) { struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp; unsigned long flags; - int rc = 0; + int rc; spin_lock_irqsave(&qp->sq_lock, flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index c7286945feca..f34ce490500f 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -360,7 +360,7 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = { static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev; - int rc = 0; + int rc; en_dev = rdev->en_dev; @@ -1145,7 +1145,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { struct bnxt_re_ring_attr rattr = {}; int num_vec_created = 0; - int rc = 0, i; + int rc, i; u8 type; /* Configure and allocate resources for qplib */ @@ -1343,7 +1343,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) struct hwrm_ver_get_input req = {}; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_fw_msg fw_msg = {}; - int rc = 0; + int rc; bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET); req.hwrm_intf_maj = HWRM_VERSION_MAJOR; @@ -1373,7 +1373,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) { - int rc = 0; + int rc; u32 event; /* Register ib dev */ @@ -1613,7 +1613,7 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode) container_of(adev, struct bnxt_aux_priv, aux_dev); struct bnxt_en_dev *en_dev; struct bnxt_re_dev *rdev; - int rc = 0; + int rc; /* en_dev should never be NULL as long as adev and aux_dev are valid. */ en_dev = aux_priv->edev; @@ -1859,7 +1859,7 @@ static struct auxiliary_driver bnxt_re_driver = { static int __init bnxt_re_mod_init(void) { - int rc = 0; + int rc; pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version); rc = auxiliary_driver_register(&bnxt_re_driver); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 282e34e8739e..db9890e14ae9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -517,7 +517,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, cqn_handler_t cqn_handler, srqn_handler_t srqn_handler) { - int rc = -1; + int rc; nq->pdev = pdev; nq->cqn_handler = cqn_handler; @@ -712,7 +712,7 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw_sbuf sbuf; struct creq_query_srq_resp_sb *sb; struct cmdq_query_srq req = {}; - int rc = 0; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_SRQ, @@ -1354,7 +1354,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct creq_query_qp_resp_sb *sb; struct cmdq_query_qp req = {}; u32 temp32[4]; - int i, rc = 0; + int i, rc; sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 9d26871af296..287117ec50ee 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -488,7 +488,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_crsqe *crsqe; unsigned long flags; u16 cookie; - int rc = 0; + int rc; u8 opcode; opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index d47764c38461..6f1e8b721ad0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -385,7 +385,7 @@ static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res, struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; struct bnxt_qplib_tqm_ctx *tqmctx; - int rc = 0; + int rc; int i; tqmctx = &ctx->tqm_ctx; @@ -463,7 +463,7 @@ static void bnxt_qplib_map_tqm_pgtbl(struct bnxt_qplib_tqm_ctx *ctx) static int bnxt_qplib_setup_tqm_rings(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx) { - int rc = 0; + int rc; rc = bnxt_qplib_alloc_tqm_rings(res, ctx); if (rc) @@ -501,7 +501,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, { struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; - int rc = 0; + int rc; if (virt_fn || is_p5) goto stats_alloc; @@ -876,7 +876,7 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, struct net_device *netdev, struct bnxt_qplib_dev_attr *dev_attr) { - int rc = 0; + int rc; res->pdev = pdev; res->netdev = netdev; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 05ee8fdb44ad..a27b68515164 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -72,7 +72,7 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, struct creq_query_version_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct cmdq_query_version req = {}; - int rc = 0; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_VERSION, @@ -97,7 +97,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_rcfw_sbuf sbuf; struct cmdq_query_func req = {}; u8 *tqm_alloc; - int i, rc = 0; + int i, rc; u32 temp; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, @@ -184,7 +184,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct creq_set_func_resources_resp resp = {}; struct cmdq_set_func_resources req = {}; struct bnxt_qplib_cmdqmsg msg = {}; - int rc = 0; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES, @@ -717,7 +717,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, struct cmdq_query_roce_stats req = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct bnxt_qplib_rcfw_sbuf sbuf; - int rc = 0; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_STATS, -- cgit From df1bcf90a66a10967a3a43510b42cb3566208011 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 4 Aug 2023 09:27:08 +0800 Subject: RDMA/hns: Fix port active speed HW supports a variety of different speed, but the current speed is fixed. The real speed should be querried from ethernet. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230804012711.808069-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 485e110ca433..9141eadf33d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -219,6 +219,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, unsigned long flags; enum ib_mtu mtu; u32 port; + int ret; port = port_num - 1; @@ -231,8 +232,10 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, IB_PORT_BOOT_MGMT_SUP; props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN; props->pkey_tbl_len = 1; - props->active_width = IB_WIDTH_4X; - props->active_speed = 1; + ret = ib_get_eth_speed(ib_dev, port_num, &props->active_speed, + &props->active_width); + if (ret) + ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret); spin_lock_irqsave(&hr_dev->iboe.lock, flags); -- cgit From 706efac4477cdb8be857f6322457de524acc02ff Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 4 Aug 2023 09:27:09 +0800 Subject: RDMA/hns: Fix incorrect post-send with direct wqe of wr-list Currently, direct wqe is not supported for wr-list. RoCE driver excludes direct wqe for wr-list by judging whether the number of wr is 1. For a wr-list where the second wr is a length-error atomic wr, the post-send driver handles the first wr and adds 1 to the wr number counter firstly. While handling the second wr, the driver finds out a length error and terminates the wr handle process, remaining the counter at 1. This causes the driver mistakenly judges there is only 1 wr and thus enters the direct wqe process, carrying the current length-error atomic wqe. This patch fixes the error by adding a judgement whether the current wr is a bad wr. If so, use the normal doorbell process but not direct wqe despite the wr number is 1. Fixes: 01584a5edcc4 ("RDMA/hns: Add support of direct wqe") Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230804012711.808069-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 30451cef5376..97ff7f76fad5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -750,7 +750,8 @@ out: qp->sq.head += nreq; qp->next_sge = sge_idx; - if (nreq == 1 && (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + if (nreq == 1 && !ret && + (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) write_dwqe(hr_dev, qp, wqe); else update_sq_db(hr_dev, qp); -- cgit From c9c0bd3c177d93d80968f720304087ba83fe8f74 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 4 Aug 2023 09:27:10 +0800 Subject: RDMA/hns: Fix inaccurate error label name in init instance This patch fixes inaccurate error label name in init instance. Fixes: 70f92521584f ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT") Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230804012711.808069-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 97ff7f76fad5..eef143388f65 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6658,14 +6658,14 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) ret = hns_roce_init(hr_dev); if (ret) { dev_err(hr_dev->dev, "RoCE Engine init failed!\n"); - goto error_failed_cfg; + goto error_failed_roce_init; } if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { ret = free_mr_init(hr_dev); if (ret) { dev_err(hr_dev->dev, "failed to init free mr!\n"); - goto error_failed_roce_init; + goto error_failed_free_mr_init; } } @@ -6673,10 +6673,10 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) return 0; -error_failed_roce_init: +error_failed_free_mr_init: hns_roce_exit(hr_dev); -error_failed_cfg: +error_failed_roce_init: kfree(hr_dev->priv); error_failed_kzalloc: -- cgit From 9e03dbea2b0634b21a45946b4f8097e0dc86ebe1 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 4 Aug 2023 09:27:11 +0800 Subject: RDMA/hns: Fix CQ and QP cache affinity Currently, the affinity between QP cache and CQ cache is not considered when assigning QPN, it will affect the message rate of HW. Allocate QPN from QP cache with better CQ affinity to get better performance. Fixes: 71586dd20010 ("RDMA/hns: Create QP with selected QPN for bank load balance") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230804012711.808069-5-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_qp.c | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 34e099efaae3..bfe4d84897ad 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -97,6 +97,7 @@ #define HNS_ROCE_CQ_BANK_NUM 4 #define CQ_BANKID_SHIFT 2 +#define CQ_BANKID_MASK GENMASK(1, 0) enum { SERV_TYPE_RC, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d855a917f4cf..cdc1c6de43a1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -170,14 +170,29 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } -static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) +static u8 get_affinity_cq_bank(u8 qp_bank) { - u32 least_load = bank[0].inuse; + return (qp_bank >> 1) & CQ_BANKID_MASK; +} + +static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr, + struct hns_roce_bank *bank) +{ +#define INVALID_LOAD_QPNUM 0xFFFFFFFF + struct ib_cq *scq = init_attr->send_cq; + u32 least_load = INVALID_LOAD_QPNUM; + unsigned long cqn = 0; u8 bankid = 0; u32 bankcnt; u8 i; - for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { + if (scq) + cqn = to_hr_cq(scq)->cqn; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK))) + continue; + bankcnt = bank[i].inuse; if (bankcnt < least_load) { least_load = bankcnt; @@ -209,7 +224,8 @@ static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, return 0; } -static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; unsigned long num = 0; @@ -220,7 +236,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) num = 1; } else { mutex_lock(&qp_table->bank_mutex); - bankid = get_least_load_bankid_for_qp(qp_table->bank); + bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank); ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, &num); @@ -1082,7 +1098,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_buf; } - ret = alloc_qpn(hr_dev, hr_qp); + ret = alloc_qpn(hr_dev, hr_qp, init_attr); if (ret) { ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_qpn; -- cgit From 26b7d1a27167e7adf75b150755e05d2bc123ce55 Mon Sep 17 00:00:00 2001 From: Xiang Yang Date: Fri, 4 Aug 2023 10:25:25 +0800 Subject: IB/uverbs: Fix an potential error pointer dereference smatch reports the warning below: drivers/infiniband/core/uverbs_std_types_counters.c:110 ib_uverbs_handler_UVERBS_METHOD_COUNTERS_READ() error: 'uattr' dereferencing possible ERR_PTR() The return value of uattr maybe ERR_PTR(-ENOENT), fix this by checking the value of uattr before using it. Fixes: ebb6796bd397 ("IB/uverbs: Add read counters support") Signed-off-by: Xiang Yang Link: https://lore.kernel.org/r/20230804022525.1916766-1-xiangyang3@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_counters.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 999da9c79866..381aa5797641 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -107,6 +107,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( return ret; uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); + if (IS_ERR(uattr)) + return PTR_ERR(uattr); read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); read_attr.counters_buff = uverbs_zalloc( attrs, array_size(read_attr.ncounters, sizeof(u64))); -- cgit From 849b1955ade1c647234d6fadeb70377d9def01ca Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Fri, 4 Aug 2023 16:21:01 +0800 Subject: RDMA: Remove unnecessary NULL values The NULL initialization of the pointers assigned by kzalloc() first is not necessary, because if the kzalloc() failed, the pointers will be assigned NULL, otherwise it works as usual. so remove it. Signed-off-by: Ruan Jinjie Link: https://lore.kernel.org/r/20230804082102.3361961-1-ruanjinjie@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/iwpm_util.c | 2 +- drivers/infiniband/hw/irdma/verbs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 358a2db38d23..eecb369898f5 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -307,7 +307,7 @@ get_remote_info_exit: struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) { - struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct iwpm_nlmsg_request *nlmsg_request; unsigned long flags; nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 0ca5b88d82e8..660be7f13060 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2865,8 +2865,8 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region, enum irdma_memreg_type reg_type) { struct irdma_device *iwdev = to_iwdev(pd->device); - struct irdma_pbl *iwpbl = NULL; - struct irdma_mr *iwmr = NULL; + struct irdma_pbl *iwpbl; + struct irdma_mr *iwmr; unsigned long pgsz_bitmap; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); -- cgit From 64917f4c35b3e490e0c0f966fab533dfb560db5e Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 4 Aug 2023 17:05:28 +0200 Subject: RDMA: Make all 'class' structures const Now that the driver core allows for struct class to be in read-only memory, making all 'class' structures to be declared at build time placing them into read-only memory, instead of having to be dynamically allocated at load time. Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Dennis Dalessandro Cc: "Md. Haris Iqbal" Cc: Jack Wang Cc: Greg Kroah-Hartman Cc: Yishai Hadas Cc: Ivan Orlov Cc: Benjamin Tissoires Suggested-by: Greg Kroah-Hartman Signed-off-by: Ivan Orlov Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/2023080427-commuting-crewless-cbee@gregkh Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_main.c | 35 +++++++------- drivers/infiniband/hw/hfi1/device.c | 72 +++++++++++++--------------- drivers/infiniband/hw/qib/qib_file_ops.c | 17 ++++--- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 19 ++++---- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 2 +- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 15 +++--- drivers/infiniband/ulp/rtrs/rtrs-srv.h | 2 +- 7 files changed, 79 insertions(+), 83 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 7c9c79c13941..bf800f8cb3e4 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -72,12 +72,23 @@ enum { #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static dev_t dynamic_uverbs_dev; -static struct class *uverbs_class; static DEFINE_IDA(uverbs_ida); static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +static char *uverbs_devnode(const struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + +static const struct class uverbs_class = { + .name = "infiniband_verbs", + .devnode = uverbs_devnode, +}; + /* * Must be called with the ufile->device->disassociate_srcu held, and the lock * must be held until use of the ucontext is finished. @@ -1117,7 +1128,7 @@ static int ib_uverbs_add_one(struct ib_device *device) } device_initialize(&uverbs_dev->dev); - uverbs_dev->dev.class = uverbs_class; + uverbs_dev->dev.class = &uverbs_class; uverbs_dev->dev.parent = device->dev.parent; uverbs_dev->dev.release = ib_uverbs_release_dev; uverbs_dev->groups[0] = &dev_attr_group; @@ -1235,13 +1246,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) put_device(&uverbs_dev->dev); } -static char *uverbs_devnode(const struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0666; - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); -} - static int __init ib_uverbs_init(void) { int ret; @@ -1262,16 +1266,13 @@ static int __init ib_uverbs_init(void) goto out_alloc; } - uverbs_class = class_create("infiniband_verbs"); - if (IS_ERR(uverbs_class)) { - ret = PTR_ERR(uverbs_class); + ret = class_register(&uverbs_class); + if (ret) { pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } - uverbs_class->devnode = uverbs_devnode; - - ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); + ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr); if (ret) { pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; @@ -1286,7 +1287,7 @@ static int __init ib_uverbs_init(void) return 0; out_class: - class_destroy(uverbs_class); + class_unregister(&uverbs_class); out_chrdev: unregister_chrdev_region(dynamic_uverbs_dev, @@ -1303,7 +1304,7 @@ out: static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); - class_destroy(uverbs_class); + class_unregister(&uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_uverbs_dev, diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index 05be0d119f79..b0a00b7aaec5 100644 --- a/drivers/infiniband/hw/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -10,8 +10,29 @@ #include "hfi.h" #include "device.h" -static struct class *class; -static struct class *user_class; +static char *hfi1_devnode(const struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0600; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + +static const struct class class = { + .name = "hfi1", + .devnode = hfi1_devnode, +}; + +static char *hfi1_user_devnode(const struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + +static const struct class user_class = { + .name = "hfi1_user", + .devnode = hfi1_user_devnode, +}; static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, @@ -37,9 +58,9 @@ int hfi1_cdev_init(int minor, const char *name, } if (user_accessible) - device = device_create(user_class, NULL, dev, NULL, "%s", name); + device = device_create(&user_class, NULL, dev, NULL, "%s", name); else - device = device_create(class, NULL, dev, NULL, "%s", name); + device = device_create(&class, NULL, dev, NULL, "%s", name); if (IS_ERR(device)) { ret = PTR_ERR(device); @@ -72,26 +93,6 @@ const char *class_name(void) return hfi1_class_name; } -static char *hfi1_devnode(const struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0600; - return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); -} - -static const char *hfi1_class_name_user = "hfi1_user"; -static const char *class_name_user(void) -{ - return hfi1_class_name_user; -} - -static char *hfi1_user_devnode(const struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0666; - return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); -} - int __init dev_init(void) { int ret; @@ -102,27 +103,21 @@ int __init dev_init(void) goto done; } - class = class_create(class_name()); - if (IS_ERR(class)) { - ret = PTR_ERR(class); + ret = class_register(&class); + if (ret) { pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); goto done; } - class->devnode = hfi1_devnode; - user_class = class_create(class_name_user()); - if (IS_ERR(user_class)) { - ret = PTR_ERR(user_class); + ret = class_register(&user_class); + if (ret) { pr_err("Could not create device class for user accessible files (err %d)\n", -ret); - class_destroy(class); - class = NULL; - user_class = NULL; + class_unregister(&class); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); goto done; } - user_class->devnode = hfi1_user_devnode; done: return ret; @@ -130,11 +125,8 @@ done: void dev_cleanup(void) { - class_destroy(class); - class = NULL; - - class_destroy(user_class); - user_class = NULL; + class_unregister(&class); + class_unregister(&user_class); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); } diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index ef85bc8d9384..152952127f13 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -2250,7 +2250,9 @@ static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from) return qib_user_sdma_writev(rcd, pq, iter_iov(from), from->nr_segs); } -static struct class *qib_class; +static const struct class qib_class = { + .name = "ipath", +}; static dev_t qib_dev; int qib_cdev_init(int minor, const char *name, @@ -2281,7 +2283,7 @@ int qib_cdev_init(int minor, const char *name, goto err_cdev; } - device = device_create(qib_class, NULL, dev, NULL, "%s", name); + device = device_create(&qib_class, NULL, dev, NULL, "%s", name); if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); @@ -2325,9 +2327,8 @@ int __init qib_dev_init(void) goto done; } - qib_class = class_create("ipath"); - if (IS_ERR(qib_class)) { - ret = PTR_ERR(qib_class); + ret = class_register(&qib_class); + if (ret) { pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(qib_dev, QIB_NMINORS); } @@ -2338,10 +2339,8 @@ done: void qib_dev_cleanup(void) { - if (qib_class) { - class_destroy(qib_class); - qib_class = NULL; - } + if (class_is_registered(&qib_class)) + class_unregister(&qib_class); unregister_chrdev_region(qib_dev, QIB_NMINORS); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index b32941dd67cb..b6ee801fd0ff 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -45,7 +45,9 @@ static struct rtrs_rdma_dev_pd dev_pd = { }; static struct workqueue_struct *rtrs_wq; -static struct class *rtrs_clt_dev_class; +static const struct class rtrs_clt_dev_class = { + .name = "rtrs-client", +}; static inline bool rtrs_clt_is_connected(const struct rtrs_clt_sess *clt) { @@ -2698,7 +2700,7 @@ static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num, return ERR_PTR(-ENOMEM); } - clt->dev.class = rtrs_clt_dev_class; + clt->dev.class = &rtrs_clt_dev_class; clt->dev.release = rtrs_clt_dev_release; uuid_gen(&clt->paths_uuid); INIT_LIST_HEAD_RCU(&clt->paths_list); @@ -3151,16 +3153,17 @@ static const struct rtrs_rdma_dev_pd_ops dev_pd_ops = { static int __init rtrs_client_init(void) { - rtrs_rdma_dev_pd_init(0, &dev_pd); + int ret = 0; - rtrs_clt_dev_class = class_create("rtrs-client"); - if (IS_ERR(rtrs_clt_dev_class)) { + rtrs_rdma_dev_pd_init(0, &dev_pd); + ret = class_register(&rtrs_clt_dev_class); + if (ret) { pr_err("Failed to create rtrs-client dev class\n"); - return PTR_ERR(rtrs_clt_dev_class); + return ret; } rtrs_wq = alloc_workqueue("rtrs_client_wq", 0, 0); if (!rtrs_wq) { - class_destroy(rtrs_clt_dev_class); + class_unregister(&rtrs_clt_dev_class); return -ENOMEM; } @@ -3170,7 +3173,7 @@ static int __init rtrs_client_init(void) static void __exit rtrs_client_exit(void) { destroy_workqueue(rtrs_wq); - class_destroy(rtrs_clt_dev_class); + class_unregister(&rtrs_clt_dev_class); rtrs_rdma_dev_pd_deinit(&dev_pd); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 5adba0f754b6..3f305e694fe8 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -164,7 +164,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_path *srv_pat */ goto unlock; } - srv->dev.class = rtrs_dev_class; + srv->dev.class = &rtrs_dev_class; err = dev_set_name(&srv->dev, "%s", srv_path->s.sessname); if (err) goto unlock; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index c38901e2c8f4..75e56604e462 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -27,7 +27,9 @@ MODULE_LICENSE("GPL"); #define MAX_HDR_SIZE PAGE_SIZE static struct rtrs_rdma_dev_pd dev_pd; -struct class *rtrs_dev_class; +const struct class rtrs_dev_class = { + .name = "rtrs-server", +}; static struct rtrs_srv_ib_ctx ib_ctx; static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE; @@ -2253,11 +2255,10 @@ static int __init rtrs_server_init(void) err); return err; } - rtrs_dev_class = class_create("rtrs-server"); - if (IS_ERR(rtrs_dev_class)) { - err = PTR_ERR(rtrs_dev_class); + err = class_register(&rtrs_dev_class); + if (err) goto out_err; - } + rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); if (!rtrs_wq) { err = -ENOMEM; @@ -2267,7 +2268,7 @@ static int __init rtrs_server_init(void) return 0; out_dev_class: - class_destroy(rtrs_dev_class); + class_unregister(&rtrs_dev_class); out_err: return err; } @@ -2275,7 +2276,7 @@ out_err: static void __exit rtrs_server_exit(void) { destroy_workqueue(rtrs_wq); - class_destroy(rtrs_dev_class); + class_unregister(&rtrs_dev_class); rtrs_rdma_dev_pd_deinit(&dev_pd); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 2f8a638e36fa..5e325b82ff33 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -129,7 +129,7 @@ struct rtrs_srv_ib_ctx { int ib_dev_count; }; -extern struct class *rtrs_dev_class; +extern const struct class rtrs_dev_class; void close_path(struct rtrs_srv_path *srv_path); -- cgit From d952f54d01ec2ea5ee9d5e21f2ea3a5807b4bcbc Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 4 Aug 2023 21:04:18 +0800 Subject: RDMA/hns: Remove unused declaration hns_roce_modify_srq() Commit c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") declared but never implemented this. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230804130418.41728-1-yuehaibing@huawei.com Reviewed-by: Junxian Huang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index bfe4d84897ad..9691cfdd7e3d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1160,9 +1160,6 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int hns_roce_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); -int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, - enum ib_srq_attr_mask srq_attr_mask, - struct ib_udata *udata); int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata); -- cgit From 25944c068139f6eabc1418458479c30c2f6b2f10 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 31 Jul 2023 17:21:06 +0800 Subject: RDMA/cxgb4: Set sq_sig_type correctly Replace '0' with IB_SIGNAL_REQ_WR given the sq_sig_type is either IB_SIGNAL_ALL_WR or IB_SIGNAL_REQ_WR per the below. enum ib_sig_type { IB_SIGNAL_ALL_WR, IB_SIGNAL_REQ_WR }; Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20230731092106.10396-1-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index ffbd9a89981e..d16d8eaa1415 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2466,7 +2466,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges; init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; - init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; + init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; return 0; } -- cgit From ca60fd116c7ee1a4471a8ad0fe07cdfa57f24c11 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 2 Aug 2023 02:00:23 -0700 Subject: IB/core: Add more speed parsing in ib_get_width_and_speed() When the Ethernet driver does not provide the number of lanes in the __ethtool_get_link_ksettings() response, the function ib_get_width_and_speed() does not take consideration of 50G, 100G and 200G speeds while calculating the IB width and speed. Update the width and speed for the above netdev speeds. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1690966823-8159-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 25367bd6dd97..41ff5595c860 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1899,9 +1899,18 @@ static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes, } else if (netdev_speed <= SPEED_40000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_FDR10; - } else { + } else if (netdev_speed <= SPEED_50000) { + *width = IB_WIDTH_2X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_100000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_200000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_HDR; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_NDR; } return; -- cgit From 811e0ce9e6499a0c71c359a2b7e2bd6a5ed8e58f Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 14 Aug 2023 10:00:18 -0700 Subject: RDMA/bnxt_re: Initialize mutex dbq_lock Fix the missing dbq_lock mutex initialization Fixes: 2ad4e6303a6d ("RDMA/bnxt_re: Implement doorbell pacing algorithm") Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1692032419-21680-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f34ce490500f..061a89bbb3b0 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -920,6 +920,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, rdev->id = rdev->en_dev->pdev->devfn; INIT_LIST_HEAD(&rdev->qp_list); mutex_init(&rdev->qp_lock); + mutex_init(&rdev->pacing.dbq_lock); atomic_set(&rdev->stats.res.qp_count, 0); atomic_set(&rdev->stats.res.cq_count, 0); atomic_set(&rdev->stats.res.srq_count, 0); -- cgit From 213d2b9bb2d6aa50f9cbc02a0eea2096899d2e75 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 14 Aug 2023 10:00:19 -0700 Subject: RDMA/bnxt_re: Protect the PD table bitmap Syncrhonization is required to avoid simultaneous allocation of the PD. Add a new mutex lock to handle allocation from the PD table. Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1692032419-21680-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.c | 26 ++++++++++++++++++++------ drivers/infiniband/hw/bnxt_re/qplib_res.h | 4 +++- 3 files changed, 24 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index c0a7181247f6..b19334ce93c3 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -619,7 +619,7 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) int rc = 0; pd->rdev = rdev; - if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) { + if (bnxt_qplib_alloc_pd(&rdev->qplib_res, &pd->qplib_pd)) { ibdev_err(&rdev->ibdev, "Failed to allocate HW PD"); rc = -ENOMEM; goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 6f1e8b721ad0..79c43c26d511 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -642,31 +642,44 @@ static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl, } /* PDs */ -int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd) +int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd) { + struct bnxt_qplib_pd_tbl *pdt = &res->pd_tbl; u32 bit_num; + int rc = 0; + mutex_lock(&res->pd_tbl_lock); bit_num = find_first_bit(pdt->tbl, pdt->max); - if (bit_num == pdt->max) - return -ENOMEM; + if (bit_num == pdt->max) { + rc = -ENOMEM; + goto exit; + } /* Found unused PD */ clear_bit(bit_num, pdt->tbl); pd->id = bit_num; - return 0; +exit: + mutex_unlock(&res->pd_tbl_lock); + return rc; } int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd) { + int rc = 0; + + mutex_lock(&res->pd_tbl_lock); if (test_and_set_bit(pd->id, pdt->tbl)) { dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n", pd->id); - return -EINVAL; + rc = -EINVAL; + goto exit; } pd->id = 0; - return 0; +exit: + mutex_unlock(&res->pd_tbl_lock); + return rc; } static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt) @@ -691,6 +704,7 @@ static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res, pdt->max = max; memset((u8 *)pdt->tbl, 0xFF, bytes); + mutex_init(&res->pd_tbl_lock); return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 57161d303c25..5949f004f785 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -277,6 +277,8 @@ struct bnxt_qplib_res { struct net_device *netdev; struct bnxt_qplib_rcfw *rcfw; struct bnxt_qplib_pd_tbl pd_tbl; + /* To protect the pd table bit map */ + struct mutex pd_tbl_lock; struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; /* To protect the dpi table bit map */ @@ -368,7 +370,7 @@ void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res, struct bnxt_qplib_hwq *hwq); int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_hwq_attr *hwq_attr); -int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, +int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd); int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd_tbl *pd_tbl, -- cgit From 0a30e59f22b207f2ed415daa44cfc0533adc329e Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Mon, 31 Jul 2023 01:01:13 -0700 Subject: RDMA/bnxt_re: Add support for dmabuf pinned memory regions Support the new verb which indicates dmabuf support. bnxt doesn't support ODP. So use the pinned version of the dmabuf APIs to enable bnxt_re devices to work as dmabuf importer. Link: https://lore.kernel.org/r/1690790473-25850-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Saravanan Vajravel Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 83 ++++++++++++++++++++++---------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 4 ++ drivers/infiniband/hw/bnxt_re/main.c | 1 + 3 files changed, 62 insertions(+), 26 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b19334ce93c3..34c806a62269 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3974,16 +3974,13 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -/* uverbs */ -struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata) +static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_umem *umem) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; - struct bnxt_re_mr *mr; - struct ib_umem *umem; unsigned long page_size; + struct bnxt_re_mr *mr; int umem_pgs, rc; u32 active_mrs; @@ -3993,6 +3990,12 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, return ERR_PTR(-ENOMEM); } + page_size = ib_umem_find_best_pgsz(umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr); + if (!page_size) { + ibdev_err(&rdev->ibdev, "umem page size unsupported!"); + return ERR_PTR(-EINVAL); + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -4004,36 +4007,23 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { - ibdev_err(&rdev->ibdev, "Failed to allocate MR"); + ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc); + rc = -EIO; goto free_mr; } /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - - umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags); - if (IS_ERR(umem)) { - ibdev_err(&rdev->ibdev, "Failed to get umem"); - rc = -EFAULT; - goto free_mrw; - } mr->ib_umem = umem; - mr->qplib_mr.va = virt_addr; - page_size = ib_umem_find_best_pgsz( - umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr); - if (!page_size) { - ibdev_err(&rdev->ibdev, "umem page size unsupported!"); - rc = -EFAULT; - goto free_umem; - } mr->qplib_mr.total_size = length; umem_pgs = ib_umem_num_dma_blocks(umem, page_size); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, umem_pgs, page_size); if (rc) { - ibdev_err(&rdev->ibdev, "Failed to register user MR"); - goto free_umem; + ibdev_err(&rdev->ibdev, "Failed to register user MR - rc = %d\n", rc); + rc = -EIO; + goto free_mrw; } mr->ib_mr.lkey = mr->qplib_mr.lkey; @@ -4043,8 +4033,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, rdev->stats.res.mr_watermark = active_mrs; return &mr->ib_mr; -free_umem: - ib_umem_release(umem); + free_mrw: bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); free_mr: @@ -4052,6 +4041,48 @@ free_mr: return ERR_PTR(rc); } +struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata) +{ + struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct bnxt_re_dev *rdev = pd->rdev; + struct ib_umem *umem; + struct ib_mr *ib_mr; + + umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); + + ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem); + if (IS_ERR(ib_mr)) + ib_umem_release(umem); + return ib_mr; +} + +struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start, + u64 length, u64 virt_addr, int fd, + int mr_access_flags, struct ib_udata *udata) +{ + struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct bnxt_re_dev *rdev = pd->rdev; + struct ib_umem_dmabuf *umem_dmabuf; + struct ib_umem *umem; + struct ib_mr *ib_mr; + + umem_dmabuf = ib_umem_dmabuf_get_pinned(&rdev->ibdev, start, length, + fd, mr_access_flags); + if (IS_ERR(umem_dmabuf)) + return ERR_CAST(umem_dmabuf); + + umem = &umem_dmabuf->umem; + + ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem); + if (IS_ERR(ib_mr)) + ib_umem_release(umem); + return ib_mr; +} + int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) { struct ib_device *ibdev = ctx->device; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index f392a09b9e2c..84715b7e7a4e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -229,6 +229,10 @@ int bnxt_re_dealloc_mw(struct ib_mw *mw); struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); +struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start, + u64 length, u64 virt_addr, + int fd, int mr_access_flags, + struct ib_udata *udata); int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata); void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 061a89bbb3b0..8654ff9744a1 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -861,6 +861,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .query_qp = bnxt_re_query_qp, .query_srq = bnxt_re_query_srq, .reg_user_mr = bnxt_re_reg_user_mr, + .reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf, .req_notify_cq = bnxt_re_req_notify_cq, .resize_cq = bnxt_re_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), -- cgit From 295c95aa7e0310ad8a89e98f1632d066e8526bb2 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 15 Aug 2023 19:12:09 -0500 Subject: RDMA/irdma: Drop unused kernel push code The driver has code blocks for kernel push WQEs but does not map the doorbell page rendering this mode non functional [1] Remove code associated with this feature from the kernel fast path as there is currently no plan of record to support this. This also address a sparse issue reported by lkp. drivers/infiniband/hw/irdma/uk.c:285:24: sparse: sparse: incorrect type in assignment (different base types) @@ expected bool [usertype] push_wqe:1 @@ got restricted __le32 [usertype] *push_db @@ drivers/infiniband/hw/irdma/uk.c:285:24: sparse: expected bool [usertype] push_wqe:1 drivers/infiniband/hw/irdma/uk.c:285:24: sparse: got restricted __le32 [usertype] *push_db drivers/infiniband/hw/irdma/uk.c:386:24: sparse: sparse: incorrect type in assignment (different base types) @@ expected bool [usertype] push_wqe:1 @@ got restricted __le32 [usertype] *push_db @@ [1] https://lore.kernel.org/linux-rdma/20230815051809.GB22185@unreal/T/#t Fixes: 272bba19d631 ("RDMA: Remove unnecessary ternary operators") Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308110251.BV6BcwUR-lkp@intel.com/ Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230816001209.1721-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 12 +--- drivers/infiniband/hw/irdma/type.h | 1 - drivers/infiniband/hw/irdma/uk.c | 117 +++++-------------------------------- drivers/infiniband/hw/irdma/user.h | 8 --- 4 files changed, 19 insertions(+), 119 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index b90abdc85057..b1fdddd2fa1a 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1301,7 +1301,6 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, sq_info.wr_id = info->wr_id; sq_info.signaled = info->signaled; - sq_info.push_wqe = info->push_wqe; wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info); @@ -1335,7 +1334,6 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPSQ_HPAGESIZE, page_size) | FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, info->access_rights) | FIELD_PREP(IRDMAQPSQ_VABASEDTO, info->addr_type) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, (sq_info.push_wqe ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -1346,13 +1344,9 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, print_hex_dump_debug("WQE: FAST_REG WQE", DUMP_PREFIX_OFFSET, 16, 8, wqe, IRDMA_QP_WQE_MIN_SIZE, false); - if (sq_info.push_wqe) { - irdma_qp_push_wqe(&qp->qp_uk, wqe, IRDMA_QP_WQE_MIN_QUANTA, - wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(&qp->qp_uk); - } + + if (post_sq) + irdma_uk_qp_post_wr(&qp->qp_uk); return 0; } diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 5ee68604e59f..b49a98c208bf 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1015,7 +1015,6 @@ struct irdma_fast_reg_stag_info { bool local_fence:1; bool read_fence:1; bool signaled:1; - bool push_wqe:1; bool use_hmc_fcn_index:1; u8 hmc_fcn_index; bool use_pf_rid:1; diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 6f9238c4fe20..9f84d997e3ab 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -127,10 +127,7 @@ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp); sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (sw_sq_head != qp->initial_ring.head) { - if (qp->push_dropped) { - writel(qp->qp_id, qp->wqe_alloc_db); - qp->push_dropped = false; - } else if (sw_sq_head != hw_sq_tail) { + if (sw_sq_head != hw_sq_tail) { if (sw_sq_head > qp->initial_ring.head) { if (hw_sq_tail >= qp->initial_ring.head && hw_sq_tail < sw_sq_head) @@ -146,38 +143,6 @@ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) qp->initial_ring.head = qp->sq_ring.head; } -/** - * irdma_qp_ring_push_db - ring qp doorbell - * @qp: hw qp ptr - * @wqe_idx: wqe index - */ -static void irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx) -{ - set_32bit_val(qp->push_db, 0, - FIELD_PREP(IRDMA_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | qp->qp_id); - qp->initial_ring.head = qp->sq_ring.head; - qp->push_mode = true; - qp->push_dropped = false; -} - -void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, - u32 wqe_idx, bool post_sq) -{ - __le64 *push; - - if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != - IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && - !qp->push_mode) { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } else { - push = (__le64 *)((uintptr_t)qp->push_wqe + - (wqe_idx & 0x7) * 0x20); - memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); - irdma_qp_ring_push_db(qp, wqe_idx); - } -} - /** * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go * @qp: hw qp ptr @@ -192,7 +157,6 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, { __le64 *wqe; __le64 *wqe_0 = NULL; - u32 nop_wqe_idx; u16 avail_quanta; u16 i; @@ -209,14 +173,10 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; - nop_wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); for (i = 0; i < avail_quanta; i++) { irdma_nop_1(qp); IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); } - if (qp->push_db && info->push_wqe) - irdma_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem, - avail_quanta, nop_wqe_idx, true); } *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); @@ -282,8 +242,6 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db; - op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) return -EINVAL; @@ -344,7 +302,6 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -353,12 +310,9 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, dma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -383,8 +337,6 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, u16 quanta; u64 hdr; - info->push_wqe = qp->push_db; - op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) return -EINVAL; @@ -431,7 +383,6 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_OPCODE, (inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -440,12 +391,9 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, dma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -468,8 +416,6 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db; - op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) return -EINVAL; @@ -530,7 +476,6 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -541,12 +486,9 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, dma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -720,7 +662,6 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, u32 i, total_size = 0; u16 quanta; - info->push_wqe = qp->push_db; op_info = &info->op.rdma_write; if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) @@ -750,7 +691,6 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -767,12 +707,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -794,7 +730,6 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, u32 i, total_size = 0; u16 quanta; - info->push_wqe = qp->push_db; op_info = &info->op.send; if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) @@ -827,7 +762,6 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -845,12 +779,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -872,7 +802,6 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, bool local_fence = false; struct ib_sge sge = {}; - info->push_wqe = qp->push_db; op_info = &info->op.inv_local_stag; local_fence = info->local_fence; @@ -889,7 +818,6 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, set_64bit_val(wqe, 16, 0); hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -899,13 +827,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx, - post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -1124,7 +1047,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); - info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); @@ -1213,11 +1135,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, return irdma_uk_cq_poll_cmpl(cq, info); } } - /*cease posting push mode on push drop*/ - if (info->push_dropped) { - qp->push_mode = false; - qp->push_dropped = true; - } if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) { info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; if (!info->comp_status) @@ -1521,7 +1438,6 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->wqe_alloc_db = info->wqe_alloc_db; qp->qp_id = info->qp_id; qp->sq_size = info->sq_size; - qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); @@ -1616,7 +1532,6 @@ int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq) u32 wqe_idx; struct irdma_post_sq_info info = {}; - info.push_wqe = false; info.wr_id = wr_id; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &info); diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index dd145ec72a91..36feca57b274 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -216,7 +216,6 @@ struct irdma_post_sq_info { bool local_fence:1; bool inline_data:1; bool imm_data_valid:1; - bool push_wqe:1; bool report_rtt:1; bool udp_hdr:1; bool defer_flag:1; @@ -248,7 +247,6 @@ struct irdma_cq_poll_info { u8 op_type; u8 q_type; bool stag_invalid_set:1; /* or L_R_Key set */ - bool push_dropped:1; bool error:1; bool solicited_event:1; bool ipv4:1; @@ -321,8 +319,6 @@ struct irdma_qp_uk { struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; __le64 *shadow_area; - __le32 *push_db; - __le64 *push_wqe; struct irdma_ring sq_ring; struct irdma_ring rq_ring; struct irdma_ring initial_ring; @@ -342,8 +338,6 @@ struct irdma_qp_uk { u8 rq_wqe_size; u8 rq_wqe_size_multiplier; bool deferred_flag:1; - bool push_mode:1; /* whether the last post wqe was pushed */ - bool push_dropped:1; bool first_sq_wq:1; bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ @@ -415,7 +409,5 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *wqdepth); int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *wqdepth); -void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, - u32 wqe_idx, bool post_sq); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); #endif /* IRDMA_USER_H */ -- cgit From 18ddaeb03bdb65b84fece11a8cac5bf583ae1b91 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Aug 2023 14:39:53 -0600 Subject: RDMA/mlx4: Copy union directly Copy union directly instead of using memcpy(). Note that in this case, a direct assignment is more readable and consistent with the subsequent assignments. This addresses the following -Wstringop-overflow warning seen in s390 with defconfig: drivers/infiniband/hw/mlx4/main.c:296:33: warning: writing 16 bytes into a region of size 0 [-Wstringop-overflow=] 296 | memcpy(&port_gid_table->gids[free].gid, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 297 | &attr->gid, sizeof(attr->gid)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This helps with the ongoing efforts to globally enable -Wstringop-overflow. Link: https://github.com/KSPP/linux/issues/308 Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZNvimeRAPkJ24zRG@work Reviewed-by: Kees Cook Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 216aacd72e4f..2d2cd17e02e6 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -293,8 +293,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) ret = -ENOMEM; } else { *context = port_gid_table->gids[free].ctx; - memcpy(&port_gid_table->gids[free].gid, - &attr->gid, sizeof(attr->gid)); + port_gid_table->gids[free].gid = attr->gid; port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].vlan_id = vlan_id; port_gid_table->gids[free].ctx->real_index = free; -- cgit From 5f513c8b9740ea05a136219739a5358780c3cb5f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2023 11:13:53 +0300 Subject: RDMA/irdma: Add missing kernel-doc in irdma_setup_umode_qp() Fix the following warning reported by kbuild: drivers/infiniband/hw/irdma/verbs.c:584: warning: Function parameter or member 'udata' not described in 'irdma_setup_umode_qp' Fixes: 3a8498720450 ("RDMA/irdma: Allow accurate reporting on QP max send/recv WR") Link: https://lore.kernel.org/r/2c9bcd2b773c400a1699bd7973e22bfba1e4b379.1692260011.git.leonro@nvidia.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308171620.m4MNACWz-lkp@intel.com/ Signed-off-by: Leon Romanovsky Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 660be7f13060..6cffe21558fe 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -573,6 +573,7 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev, /** * irdma_setup_umode_qp - setup sq and rq size in user mode qp + * @udata: udata * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) * @info: initialize info to return -- cgit From c4bb187379ddc45affbb5d3f895bdcb9eadd77e6 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 16 Aug 2023 17:18:10 +0800 Subject: RDMA/hns: Dump whole QP/CQ/MR resource in raw Currently, some fields in the QP/CQ/MR resource can be dumped by rdma-tool, but this information is not enough. It is very inconvenient to continue to expand on the current field, and it will also introduce some trouble to parse these raw data. This patch dump whole resource in raw to avoid the above problems. Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230816091812.2899366-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_restrack.c | 75 ++------------------------- 1 file changed, 3 insertions(+), 72 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 989a2af2e938..081a01de3055 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -9,8 +9,6 @@ #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" -#define MAX_ENTRY_NUM 256 - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -47,8 +45,6 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); struct hns_roce_v2_cq_context context; - u32 data[MAX_ENTRY_NUM] = {}; - int offset = 0; int ret; if (!hr_dev->hw->query_cqc) @@ -58,23 +54,7 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) if (ret) return -EINVAL; - data[offset++] = hr_reg_read(&context, CQC_CQ_ST); - data[offset++] = hr_reg_read(&context, CQC_SHIFT); - data[offset++] = hr_reg_read(&context, CQC_CQE_SIZE); - data[offset++] = hr_reg_read(&context, CQC_CQE_CNT); - data[offset++] = hr_reg_read(&context, CQC_CQ_PRODUCER_IDX); - data[offset++] = hr_reg_read(&context, CQC_CQ_CONSUMER_IDX); - data[offset++] = hr_reg_read(&context, CQC_DB_RECORD_EN); - data[offset++] = hr_reg_read(&context, CQC_ARM_ST); - data[offset++] = hr_reg_read(&context, CQC_CMD_SN); - data[offset++] = hr_reg_read(&context, CQC_CEQN); - data[offset++] = hr_reg_read(&context, CQC_CQ_MAX_CNT); - data[offset++] = hr_reg_read(&context, CQC_CQ_PERIOD); - data[offset++] = hr_reg_read(&context, CQC_CQE_HOP_NUM); - data[offset++] = hr_reg_read(&context, CQC_CQE_BAR_PG_SZ); - data[offset++] = hr_reg_read(&context, CQC_CQE_BUF_PG_SZ); - - ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context); return ret; } @@ -118,8 +98,6 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp); struct hns_roce_v2_qp_context context; - u32 data[MAX_ENTRY_NUM] = {}; - int offset = 0; int ret; if (!hr_dev->hw->query_qpc) @@ -129,42 +107,7 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) if (ret) return -EINVAL; - data[offset++] = hr_reg_read(&context, QPC_QP_ST); - data[offset++] = hr_reg_read(&context, QPC_ERR_TYPE); - data[offset++] = hr_reg_read(&context, QPC_CHECK_FLG); - data[offset++] = hr_reg_read(&context, QPC_SRQ_EN); - data[offset++] = hr_reg_read(&context, QPC_SRQN); - data[offset++] = hr_reg_read(&context, QPC_QKEY_XRCD); - data[offset++] = hr_reg_read(&context, QPC_TX_CQN); - data[offset++] = hr_reg_read(&context, QPC_RX_CQN); - data[offset++] = hr_reg_read(&context, QPC_SQ_PRODUCER_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_CONSUMER_IDX); - data[offset++] = hr_reg_read(&context, QPC_RQ_RECORD_EN); - data[offset++] = hr_reg_read(&context, QPC_RQ_PRODUCER_IDX); - data[offset++] = hr_reg_read(&context, QPC_RQ_CONSUMER_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_SHIFT); - data[offset++] = hr_reg_read(&context, QPC_RQWS); - data[offset++] = hr_reg_read(&context, QPC_RQ_SHIFT); - data[offset++] = hr_reg_read(&context, QPC_SGE_SHIFT); - data[offset++] = hr_reg_read(&context, QPC_SQ_HOP_NUM); - data[offset++] = hr_reg_read(&context, QPC_RQ_HOP_NUM); - data[offset++] = hr_reg_read(&context, QPC_SGE_HOP_NUM); - data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BA_PG_SZ); - data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BUF_PG_SZ); - data[offset++] = hr_reg_read(&context, QPC_RETRY_NUM_INIT); - data[offset++] = hr_reg_read(&context, QPC_RETRY_CNT); - data[offset++] = hr_reg_read(&context, QPC_SQ_CUR_PSN); - data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_PSN); - data[offset++] = hr_reg_read(&context, QPC_SQ_FLUSH_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_TX_ERR); - data[offset++] = hr_reg_read(&context, QPC_SQ_RX_ERR); - data[offset++] = hr_reg_read(&context, QPC_RQ_RX_ERR); - data[offset++] = hr_reg_read(&context, QPC_RQ_TX_ERR); - data[offset++] = hr_reg_read(&context, QPC_RQ_CQE_IDX); - data[offset++] = hr_reg_read(&context, QPC_RQ_RTY_TX_ERR); - - ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context); return ret; } @@ -204,8 +147,6 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device); struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr); struct hns_roce_v2_mpt_entry context; - u32 data[MAX_ENTRY_NUM] = {}; - int offset = 0; int ret; if (!hr_dev->hw->query_mpt) @@ -215,17 +156,7 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) if (ret) return -EINVAL; - data[offset++] = hr_reg_read(&context, MPT_ST); - data[offset++] = hr_reg_read(&context, MPT_PD); - data[offset++] = hr_reg_read(&context, MPT_LKEY); - data[offset++] = hr_reg_read(&context, MPT_LEN_L); - data[offset++] = hr_reg_read(&context, MPT_LEN_H); - data[offset++] = hr_reg_read(&context, MPT_PBL_SIZE); - data[offset++] = hr_reg_read(&context, MPT_PBL_HOP_NUM); - data[offset++] = hr_reg_read(&context, MPT_PBL_BA_PG_SZ); - data[offset++] = hr_reg_read(&context, MPT_PBL_BUF_PG_SZ); - - ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context); return ret; } -- cgit From 5a87279591a15f952043209d17429fadab278d47 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Wed, 16 Aug 2023 17:18:11 +0800 Subject: RDMA/hns: Support hns HW stats Support query hns HW stats for rdma-tool to help debugging. Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20230816091812.2899366-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 28 ++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 51 +++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 79 +++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9691cfdd7e3d..7f0d0288beb1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -840,6 +840,32 @@ enum hns_roce_device_state { HNS_ROCE_DEVICE_STATE_UNINIT, }; +enum hns_roce_hw_pkt_stat_index { + HNS_ROCE_HW_RX_RC_PKT_CNT, + HNS_ROCE_HW_RX_UC_PKT_CNT, + HNS_ROCE_HW_RX_UD_PKT_CNT, + HNS_ROCE_HW_RX_XRC_PKT_CNT, + HNS_ROCE_HW_RX_PKT_CNT, + HNS_ROCE_HW_RX_ERR_PKT_CNT, + HNS_ROCE_HW_RX_CNP_PKT_CNT, + HNS_ROCE_HW_TX_RC_PKT_CNT, + HNS_ROCE_HW_TX_UC_PKT_CNT, + HNS_ROCE_HW_TX_UD_PKT_CNT, + HNS_ROCE_HW_TX_XRC_PKT_CNT, + HNS_ROCE_HW_TX_PKT_CNT, + HNS_ROCE_HW_TX_ERR_PKT_CNT, + HNS_ROCE_HW_TX_CNP_PKT_CNT, + HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT, + HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT, + HNS_ROCE_HW_ECN_DB_CNT, + HNS_ROCE_HW_RX_BUF_CNT, + HNS_ROCE_HW_TRP_RX_SOF_CNT, + HNS_ROCE_HW_CQ_CQE_CNT, + HNS_ROCE_HW_CQ_POE_CNT, + HNS_ROCE_HW_CQ_NOTIFY_CNT, + HNS_ROCE_HW_CNT_TOTAL +}; + struct hns_roce_hw { int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); @@ -882,6 +908,8 @@ struct hns_roce_hw { int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer); int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer); + int (*query_hw_counter)(struct hns_roce_dev *hr_dev, + u64 *stats, u32 port, int *hw_counters); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index eef143388f65..d82daff2d9bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1613,6 +1613,56 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev, + u64 *stats, u32 port, int *num_counters) +{ +#define CNT_PER_DESC 3 + struct hns_roce_cmq_desc *desc; + int bd_idx, cnt_idx; + __le64 *cnt_data; + int desc_num; + int ret; + int i; + + if (port > hr_dev->caps.num_ports) + return -EINVAL; + + desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC); + desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + for (i = 0; i < desc_num; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_QUERY_COUNTER, true); + if (i != desc_num - 1) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + } + + ret = hns_roce_cmq_send(hr_dev, desc, desc_num); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to get counter, ret = %d.\n", ret); + goto err_out; + } + + for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) { + bd_idx = i / CNT_PER_DESC; + if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) && + bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC) + break; + + cnt_data = (__le64 *)&desc[bd_idx].data[0]; + cnt_idx = i % CNT_PER_DESC; + stats[i] = le64_to_cpu(cnt_data[cnt_idx]); + } + *num_counters = i; + +err_out: + kfree(desc); + return ret; +} + static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; @@ -6582,6 +6632,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .query_cqc = hns_roce_v2_query_cqc, .query_qpc = hns_roce_v2_query_qpc, .query_mpt = hns_roce_v2_query_mpt, + .query_hw_counter = hns_roce_hw_v2_query_counter, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index d9693f6cc802..cd97cbee682a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -198,6 +198,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, + HNS_ROCE_OPC_QUERY_COUNTER = 0x8206, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 9141eadf33d2..d9d546cdef52 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -515,6 +515,83 @@ static void hns_roce_get_fw_ver(struct ib_device *device, char *str) sub_minor); } +#define HNS_ROCE_HW_CNT(ename, cname) \ + [HNS_ROCE_HW_##ename##_CNT].name = cname + +static const struct rdma_stat_desc hns_roce_port_stats_descs[] = { + HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"), + HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"), + HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"), + HNS_ROCE_HW_CNT(RX_XRC_PKT, "rx_xrc_pkt"), + HNS_ROCE_HW_CNT(RX_PKT, "rx_pkt"), + HNS_ROCE_HW_CNT(RX_ERR_PKT, "rx_err_pkt"), + HNS_ROCE_HW_CNT(RX_CNP_PKT, "rx_cnp_pkt"), + HNS_ROCE_HW_CNT(TX_RC_PKT, "tx_rc_pkt"), + HNS_ROCE_HW_CNT(TX_UC_PKT, "tx_uc_pkt"), + HNS_ROCE_HW_CNT(TX_UD_PKT, "tx_ud_pkt"), + HNS_ROCE_HW_CNT(TX_XRC_PKT, "tx_xrc_pkt"), + HNS_ROCE_HW_CNT(TX_PKT, "tx_pkt"), + HNS_ROCE_HW_CNT(TX_ERR_PKT, "tx_err_pkt"), + HNS_ROCE_HW_CNT(TX_CNP_PKT, "tx_cnp_pkt"), + HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT, "trp_get_mpt_err_pkt"), + HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT, "trp_get_irrl_err_pkt"), + HNS_ROCE_HW_CNT(ECN_DB, "ecn_doorbell"), + HNS_ROCE_HW_CNT(RX_BUF, "rx_buffer"), + HNS_ROCE_HW_CNT(TRP_RX_SOF, "trp_rx_sof"), + HNS_ROCE_HW_CNT(CQ_CQE, "cq_cqe"), + HNS_ROCE_HW_CNT(CQ_POE, "cq_poe"), + HNS_ROCE_HW_CNT(CQ_NOTIFY, "cq_notify"), +}; + +static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats( + struct ib_device *device, u32 port_num) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(device); + u32 port = port_num - 1; + + if (port > hr_dev->caps.num_ports) { + ibdev_err(device, "invalid port num.\n"); + return NULL; + } + + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || + hr_dev->is_vf) + return NULL; + + return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs, + ARRAY_SIZE(hns_roce_port_stats_descs), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int hns_roce_get_hw_stats(struct ib_device *device, + struct rdma_hw_stats *stats, + u32 port, int index) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(device); + int num_counters = HNS_ROCE_HW_CNT_TOTAL; + int ret; + + if (port == 0) + return 0; + + if (port > hr_dev->caps.num_ports) + return -EINVAL; + + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || + hr_dev->is_vf) + return -EOPNOTSUPP; + + ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port, + &num_counters); + if (ret) { + ibdev_err(device, "failed to query hw counter, ret = %d\n", + ret); + return ret; + } + + return num_counters; +} + static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) { struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; @@ -557,6 +634,8 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_pkey = hns_roce_query_pkey, .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, + .alloc_hw_port_stats = hns_roce_alloc_hw_port_stats, + .get_hw_stats = hns_roce_get_hw_stats, INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq), -- cgit From d7cfbba90b8015bfd76b5da69ee5debbae8b7000 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Thu, 17 Aug 2023 18:21:49 +0800 Subject: RDMA/erdma: Renaming variable names and field names of struct erdma_mem Currently, variable names and field names of struct erdma_mem contain 'mtt', which is not accurate. Renaming them with 'xxx_mem' or 'mem'. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230817102151.75964-2-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 66 +++++++++++++++---------------- drivers/infiniband/hw/erdma/erdma_verbs.h | 8 ++-- 2 files changed, 37 insertions(+), 37 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index fe0521f1536e..fbbd046b350c 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -67,30 +67,30 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) user_qp = &qp->user_qp; req.sq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); + ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT); req.sq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); req.rq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); + ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT); req.rq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); - req.sq_mtt_cfg = user_qp->sq_mtt.page_offset; + req.sq_mtt_cfg = user_qp->sq_mem.page_offset; req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->sq_mtt.mtt_nents) | + user_qp->sq_mem.mtt_nents) | FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->sq_mtt.mtt_type); + user_qp->sq_mem.mtt_type); - req.rq_mtt_cfg = user_qp->rq_mtt.page_offset; + req.rq_mtt_cfg = user_qp->rq_mem.page_offset; req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->rq_mtt.mtt_nents) | + user_qp->rq_mem.mtt_nents) | FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->rq_mtt.mtt_type); + user_qp->rq_mem.mtt_type); - req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0]; - req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0]; + req.sq_buf_addr = user_qp->sq_mem.mtt_entry[0]; + req.rq_buf_addr = user_qp->rq_mem.mtt_entry[0]; req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; @@ -161,7 +161,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) { struct erdma_dev *dev = to_edev(cq->ibcq.device); struct erdma_cmdq_create_cq_req req; - struct erdma_mem *mtt; + struct erdma_mem *mem; u32 page_size; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, @@ -186,23 +186,23 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) req.cq_db_info_addr = cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); } else { - mtt = &cq->user_cq.qbuf_mtt; + mem = &cq->user_cq.qbuf_mem; req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT); - if (mtt->mtt_nents == 1) { - req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf); - req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf); + ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT); + if (mem->mtt_nents == 1) { + req.qbuf_addr_l = lower_32_bits(*(u64 *)mem->mtt_buf); + req.qbuf_addr_h = upper_32_bits(*(u64 *)mem->mtt_buf); } else { - req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]); - req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]); + req.qbuf_addr_l = lower_32_bits(mem->mtt_entry[0]); + req.qbuf_addr_h = upper_32_bits(mem->mtt_entry[0]); } req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, - mtt->mtt_nents); + mem->mtt_nents); req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, - mtt->mtt_type); + mem->mtt_type); - req.first_page_offset = mtt->page_offset; + req.first_page_offset = mem->page_offset; req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; if (uctx->ext_db.enable) { @@ -660,7 +660,7 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, qp->attrs.rq_size * RQE_SIZE)) return -EINVAL; - ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va, + ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va, qp->attrs.sq_size << SQEBB_SHIFT, 0, va, (SZ_1M - SZ_4K), 1); if (ret) @@ -669,7 +669,7 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE); qp->user_qp.rq_offset = rq_offset; - ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset, + ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset, qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset, (SZ_1M - SZ_4K), 1); if (ret) @@ -687,18 +687,18 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, return 0; put_rq_mtt: - put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt); + put_mtt_entries(qp->dev, &qp->user_qp.rq_mem); put_sq_mtt: - put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt); + put_mtt_entries(qp->dev, &qp->user_qp.sq_mem); return ret; } static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx) { - put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt); - put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt); + put_mtt_entries(qp->dev, &qp->user_qp.sq_mem); + put_mtt_entries(qp->dev, &qp->user_qp.rq_mem); erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page); } @@ -1041,7 +1041,7 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); } else { erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); - put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + put_mtt_entries(dev, &cq->user_cq.qbuf_mem); } xa_erase(&dev->cq_xa, cq->cqn); @@ -1089,8 +1089,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); } else { - put_mtt_entries(dev, &qp->user_qp.sq_mtt); - put_mtt_entries(dev, &qp->user_qp.rq_mtt); + put_mtt_entries(dev, &qp->user_qp.sq_mem); + put_mtt_entries(dev, &qp->user_qp.rq_mem); erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page); } @@ -1379,7 +1379,7 @@ static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, int ret; struct erdma_dev *dev = to_edev(cq->ibcq.device); - ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va, + ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va, ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, 1); if (ret) @@ -1389,7 +1389,7 @@ static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, &cq->user_cq.user_dbr_page, &cq->user_cq.db_info_dma_addr); if (ret) - put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + put_mtt_entries(dev, &cq->user_cq.qbuf_mem); return ret; } @@ -1473,7 +1473,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, err_free_res: if (!rdma_is_kernel_res(&ibcq->res)) { erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); - put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + put_mtt_entries(dev, &cq->user_cq.qbuf_mem); } else { dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(depth << CQE_SHIFT), diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index 429fc3063f98..abaf031fe0d2 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -65,7 +65,7 @@ struct erdma_pd { * MemoryRegion definition. */ #define ERDMA_MAX_INLINE_MTT_ENTRIES 4 -#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt takes 8 Bytes. */ +#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt entry takes 8 Bytes. */ #define ERDMA_MR_MAX_MTT_CNT 524288 #define ERDMA_MTT_ENTRY_SIZE 8 @@ -121,8 +121,8 @@ struct erdma_user_dbrecords_page { }; struct erdma_uqp { - struct erdma_mem sq_mtt; - struct erdma_mem rq_mtt; + struct erdma_mem sq_mem; + struct erdma_mem rq_mem; dma_addr_t sq_db_info_dma_addr; dma_addr_t rq_db_info_dma_addr; @@ -234,7 +234,7 @@ struct erdma_kcq_info { }; struct erdma_ucq_info { - struct erdma_mem qbuf_mtt; + struct erdma_mem qbuf_mem; struct erdma_user_dbrecords_page *user_dbr_page; dma_addr_t db_info_dma_addr; }; -- cgit From 7244b4aa4221d7b8d59b6ca5a97c7073b2802f10 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Thu, 17 Aug 2023 18:21:50 +0800 Subject: RDMA/erdma: Refactor the storage structure of MTT entries Currently our MTT only support inline mtt entries (0 level MTT) and indirect MTT entries (1 level mtt), which will limit the maximum length of MRs. In order to implement a multi-level MTT, we refactor the structure of MTT first. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230817102151.75964-3-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_hw.h | 4 +- drivers/infiniband/hw/erdma/erdma_qp.c | 2 +- drivers/infiniband/hw/erdma/erdma_verbs.c | 214 ++++++++++++++++++------------ drivers/infiniband/hw/erdma/erdma_verbs.h | 26 +++- 4 files changed, 152 insertions(+), 94 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index a882b57aa118..80a78569bc2a 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -228,7 +228,7 @@ struct erdma_cmdq_ext_db_req { /* create_cq cfg1 */ #define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16) -#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15) +#define ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK BIT(15) #define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11) #define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0) @@ -258,7 +258,7 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) -#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20) +#define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20) #define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0) struct erdma_cmdq_reg_mr_req { diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 44923c51a01b..6d0330badd68 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -410,7 +410,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, /* Copy SGLs to SQE content to accelerate */ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, qp->attrs.sq_size, SQEBB_SHIFT), - mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents)); + mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents)); wqe_size = sizeof(struct erdma_reg_mr_sqe) + MTT_SIZE(mr->mem.mtt_nents); } else { diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index fbbd046b350c..26bb8c53683e 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -19,6 +19,23 @@ #include "erdma_cm.h" #include "erdma_verbs.h" +static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg, + u64 *addr0, u64 *addr1) +{ + struct erdma_mtt *mtt = mem->mtt; + + if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) { + *addr0 = mtt->buf_dma; + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + ERDMA_MR_INDIRECT_MTT); + } else { + *addr0 = mtt->buf[0]; + memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1)); + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + ERDMA_MR_INLINE_MTT); + } +} + static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) { struct erdma_dev *dev = to_edev(qp->ibqp.device); @@ -79,18 +96,16 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) req.sq_mtt_cfg = user_qp->sq_mem.page_offset; req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->sq_mem.mtt_nents) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->sq_mem.mtt_type); + user_qp->sq_mem.mtt_nents); req.rq_mtt_cfg = user_qp->rq_mem.page_offset; req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->rq_mem.mtt_nents) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->rq_mem.mtt_type); + user_qp->rq_mem.mtt_nents); - req.sq_buf_addr = user_qp->sq_mem.mtt_entry[0]; - req.rq_buf_addr = user_qp->rq_mem.mtt_entry[0]; + assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg, + &req.sq_buf_addr, req.sq_mtt_entry); + assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg, + &req.rq_buf_addr, req.rq_mtt_entry); req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; @@ -117,13 +132,22 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) { - struct erdma_cmdq_reg_mr_req req; struct erdma_pd *pd = to_epd(mr->ibmr.pd); - u64 *phy_addr; - int i; + struct erdma_cmdq_reg_mr_req req; + u32 mtt_level; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); + if (mr->type == ERDMA_MR_TYPE_FRMR || + mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) { + req.phy_addr[0] = mr->mem.mtt->buf_dma; + mtt_level = ERDMA_MR_INDIRECT_MTT; + } else { + memcpy(req.phy_addr, mr->mem.mtt->buf, + MTT_SIZE(mr->mem.page_cnt)); + mtt_level = ERDMA_MR_INLINE_MTT; + } + req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) | FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8); @@ -132,7 +156,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access); req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK, ilog2(mr->mem.page_size)) | - FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) | + FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) | FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt); if (mr->type == ERDMA_MR_TYPE_DMA) @@ -143,16 +167,6 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.size = mr->mem.len; } - if (mr->type == ERDMA_MR_TYPE_FRMR || - mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) { - phy_addr = req.phy_addr; - *phy_addr = mr->mem.mtt_entry[0]; - } else { - phy_addr = req.phy_addr; - for (i = 0; i < mr->mem.mtt_nents; i++) - *phy_addr++ = mr->mem.mtt_entry[i]; - } - post_cmd: return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } @@ -179,7 +193,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr); req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, ERDMA_MR_INLINE_MTT); req.first_page_offset = 0; @@ -191,16 +205,20 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT); if (mem->mtt_nents == 1) { - req.qbuf_addr_l = lower_32_bits(*(u64 *)mem->mtt_buf); - req.qbuf_addr_h = upper_32_bits(*(u64 *)mem->mtt_buf); + req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]); + req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]); + req.cfg1 |= + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, + ERDMA_MR_INLINE_MTT); } else { - req.qbuf_addr_l = lower_32_bits(mem->mtt_entry[0]); - req.qbuf_addr_h = upper_32_bits(mem->mtt_entry[0]); + req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma); + req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma); + req.cfg1 |= + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, + ERDMA_MR_INDIRECT_MTT); } req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, mem->mtt_nents); - req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, - mem->mtt_type); req.first_page_offset = mem->page_offset; req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; @@ -508,12 +526,77 @@ err_out: return -ENOMEM; } +static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem) +{ + struct erdma_mtt *mtt = mem->mtt; + struct ib_block_iter biter; + u32 idx = 0; + + while (mtt->low_level) + mtt = mtt->low_level; + + rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) + mtt->buf[idx++] = rdma_block_iter_dma_address(&biter); +} + +static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev, + size_t size) +{ + struct erdma_mtt *mtt; + int ret = -ENOMEM; + + mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); + if (!mtt) + return ERR_PTR(-ENOMEM); + + mtt->size = size; + mtt->buf = kzalloc(mtt->size, GFP_KERNEL); + if (!mtt->buf) + goto err_free_mtt; + + mtt->continuous = true; + mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size, + DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma)) + goto err_free_mtt_buf; + + return mtt; + +err_free_mtt_buf: + kfree(mtt->buf); + +err_free_mtt: + kfree(mtt); + + return ERR_PTR(ret); +} + +static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, + bool force_continuous) +{ + ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size, + force_continuous); + + if (force_continuous) + return erdma_create_cont_mtt(dev, size); + + return ERR_PTR(-EOPNOTSUPP); +} + +static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt) +{ + if (mtt->continuous) { + dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size, + DMA_TO_DEVICE); + kfree(mtt->buf); + kfree(mtt); + } +} + static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, u64 start, u64 len, int access, u64 virt, unsigned long req_page_size, u8 force_indirect_mtt) { - struct ib_block_iter biter; - uint64_t *phy_addr = NULL; int ret = 0; mem->umem = ib_umem_get(&dev->ibdev, start, len, access); @@ -529,38 +612,13 @@ static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, mem->page_offset = start & (mem->page_size - 1); mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); mem->page_cnt = mem->mtt_nents; - - if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES || - force_indirect_mtt) { - mem->mtt_type = ERDMA_MR_INDIRECT_MTT; - mem->mtt_buf = - alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL); - if (!mem->mtt_buf) { - ret = -ENOMEM; - goto error_ret; - } - phy_addr = mem->mtt_buf; - } else { - mem->mtt_type = ERDMA_MR_INLINE_MTT; - phy_addr = mem->mtt_entry; - } - - rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) { - *phy_addr = rdma_block_iter_dma_address(&biter); - phy_addr++; + mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt), true); + if (IS_ERR(mem->mtt)) { + ret = PTR_ERR(mem->mtt); + goto error_ret; } - if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) { - mem->mtt_entry[0] = - dma_map_single(&dev->pdev->dev, mem->mtt_buf, - MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); - if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) { - free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); - mem->mtt_buf = NULL; - ret = -ENOMEM; - goto error_ret; - } - } + erdma_fill_bottom_mtt(dev, mem); return 0; @@ -575,11 +633,8 @@ error_ret: static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) { - if (mem->mtt_buf) { - dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0], - MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); - free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); - } + if (mem->mtt) + erdma_destroy_mtt(dev, mem->mtt); if (mem->umem) { ib_umem_release(mem->umem); @@ -875,33 +930,20 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, mr->mem.page_size = PAGE_SIZE; /* update it later. */ mr->mem.page_cnt = max_num_sg; - mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT; - mr->mem.mtt_buf = - alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL); - if (!mr->mem.mtt_buf) { - ret = -ENOMEM; + mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true); + if (IS_ERR(mr->mem.mtt)) { + ret = PTR_ERR(mr->mem.mtt); goto out_remove_stag; } - mr->mem.mtt_entry[0] = - dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf, - MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); - if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) { - ret = -ENOMEM; - goto out_free_mtt; - } - ret = regmr_cmd(dev, mr); if (ret) - goto out_dma_unmap; + goto out_destroy_mtt; return &mr->ibmr; -out_dma_unmap: - dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0], - MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); -out_free_mtt: - free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt)); +out_destroy_mtt: + erdma_destroy_mtt(dev, mr->mem.mtt); out_remove_stag: erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], @@ -920,7 +962,7 @@ static int erdma_set_page(struct ib_mr *ibmr, u64 addr) if (mr->mem.mtt_nents >= mr->mem.page_cnt) return -1; - *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr; + mr->mem.mtt->buf[mr->mem.mtt_nents] = addr; mr->mem.mtt_nents++; return 0; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index abaf031fe0d2..5f639f27a8a9 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -65,7 +65,7 @@ struct erdma_pd { * MemoryRegion definition. */ #define ERDMA_MAX_INLINE_MTT_ENTRIES 4 -#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt entry takes 8 Bytes. */ +#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt entry takes 8 Bytes. */ #define ERDMA_MR_MAX_MTT_CNT 524288 #define ERDMA_MTT_ENTRY_SIZE 8 @@ -90,10 +90,28 @@ static inline u8 to_erdma_access_flags(int access) (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } +/* Hierarchical storage structure for MTT entries */ +struct erdma_mtt { + u64 *buf; + size_t size; + + bool continuous; + union { + dma_addr_t buf_dma; + struct { + struct scatterlist *sglist; + u32 nsg; + u32 level; + }; + }; + + struct erdma_mtt *low_level; +}; + struct erdma_mem { struct ib_umem *umem; - void *mtt_buf; - u32 mtt_type; + struct erdma_mtt *mtt; + u32 page_size; u32 page_offset; u32 page_cnt; @@ -101,8 +119,6 @@ struct erdma_mem { u64 va; u64 len; - - u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES]; }; struct erdma_mr { -- cgit From ed10435d35831478c2a93a238b62e6699bdf4834 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Thu, 17 Aug 2023 18:21:51 +0800 Subject: RDMA/erdma: Implement hierarchical MTT Hierarchical MTT allows large MR registration without the need of continuous physical address. This commit adds the support of hierarchical MTT support for erdma. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230817102151.75964-4-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_hw.h | 14 ++- drivers/infiniband/hw/erdma/erdma_verbs.c | 200 +++++++++++++++++++++++++++--- drivers/infiniband/hw/erdma/erdma_verbs.h | 4 +- 3 files changed, 194 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 80a78569bc2a..9d316fdc6f9a 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -248,6 +248,7 @@ struct erdma_cmdq_create_cq_req { /* regmr/deregmr cfg0 */ #define ERDMA_CMD_MR_VALID_MASK BIT(31) +#define ERDMA_CMD_MR_VERSION_MASK GENMASK(30, 28) #define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20) #define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0) @@ -258,6 +259,7 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) +#define ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK GENMASK(26, 24) #define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20) #define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0) @@ -268,7 +270,14 @@ struct erdma_cmdq_reg_mr_req { u64 start_va; u32 size; u32 cfg2; - u64 phy_addr[4]; + union { + u64 phy_addr[4]; + struct { + u64 rsvd; + u32 size_h; + u32 mtt_cnt_h; + }; + }; }; struct erdma_cmdq_dereg_mr_req { @@ -309,7 +318,7 @@ struct erdma_cmdq_modify_qp_req { /* create qp mtt_cfg */ #define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12) #define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1) -#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0) +#define ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK BIT(0) /* create qp db cfg */ #define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16) @@ -364,6 +373,7 @@ struct erdma_cmdq_reflush_req { enum { ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, + ERDMA_DEV_CAP_FLAGS_MTT_VA = 1 << 5, ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3, }; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 26bb8c53683e..dcccb6015232 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -26,13 +26,13 @@ static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg, if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) { *addr0 = mtt->buf_dma; - *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - ERDMA_MR_INDIRECT_MTT); + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, + ERDMA_MR_MTT_1LEVEL); } else { *addr0 = mtt->buf[0]; memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1)); - *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - ERDMA_MR_INLINE_MTT); + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, + ERDMA_MR_MTT_0LEVEL); } } @@ -70,8 +70,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) req.sq_mtt_cfg = FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) | FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - ERDMA_MR_INLINE_MTT); + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, + ERDMA_MR_MTT_0LEVEL); req.rq_mtt_cfg = req.sq_mtt_cfg; req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; @@ -140,12 +140,17 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) if (mr->type == ERDMA_MR_TYPE_FRMR || mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) { - req.phy_addr[0] = mr->mem.mtt->buf_dma; - mtt_level = ERDMA_MR_INDIRECT_MTT; + if (mr->mem.mtt->continuous) { + req.phy_addr[0] = mr->mem.mtt->buf_dma; + mtt_level = ERDMA_MR_MTT_1LEVEL; + } else { + req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist); + mtt_level = mr->mem.mtt->level; + } } else { memcpy(req.phy_addr, mr->mem.mtt->buf, MTT_SIZE(mr->mem.page_cnt)); - mtt_level = ERDMA_MR_INLINE_MTT; + mtt_level = ERDMA_MR_MTT_0LEVEL; } req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | @@ -167,6 +172,14 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.size = mr->mem.len; } + if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) { + req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1); + req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK, + PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT); + req.size_h = upper_32_bits(mr->mem.len); + req.mtt_cnt_h = mr->mem.page_cnt >> 20; + } + post_cmd: return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } @@ -194,7 +207,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) | FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, - ERDMA_MR_INLINE_MTT); + ERDMA_MR_MTT_0LEVEL); req.first_page_offset = 0; req.cq_db_info_addr = @@ -209,13 +222,13 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]); req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, - ERDMA_MR_INLINE_MTT); + ERDMA_MR_MTT_0LEVEL); } else { req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma); req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma); req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, - ERDMA_MR_INDIRECT_MTT); + ERDMA_MR_MTT_1LEVEL); } req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, mem->mtt_nents); @@ -543,7 +556,6 @@ static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev, size_t size) { struct erdma_mtt *mtt; - int ret = -ENOMEM; mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); if (!mtt) @@ -565,6 +577,104 @@ static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev, err_free_mtt_buf: kfree(mtt->buf); +err_free_mtt: + kfree(mtt); + + return ERR_PTR(-ENOMEM); +} + +static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE); + vfree(mtt->sglist); +} + +static void erdma_destroy_scatter_mtt(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + erdma_destroy_mtt_buf_sg(dev, mtt); + vfree(mtt->buf); + kfree(mtt); +} + +static void erdma_init_middle_mtt(struct erdma_mtt *mtt, + struct erdma_mtt *low_mtt) +{ + struct scatterlist *sg; + u32 idx = 0, i; + + for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i) + mtt->buf[idx++] = sg_dma_address(sg); +} + +static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt) +{ + struct scatterlist *sglist; + void *buf = mtt->buf; + u32 npages, i, nsg; + struct page *pg; + + /* Failed if buf is not page aligned */ + if ((uintptr_t)buf & ~PAGE_MASK) + return -EINVAL; + + npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE); + sglist = vzalloc(npages * sizeof(*sglist)); + if (!sglist) + return -ENOMEM; + + sg_init_table(sglist, npages); + for (i = 0; i < npages; i++) { + pg = vmalloc_to_page(buf); + if (!pg) + goto err; + sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); + buf += PAGE_SIZE; + } + + nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE); + if (!nsg) + goto err; + + mtt->sglist = sglist; + mtt->nsg = nsg; + + return 0; +err: + vfree(sglist); + + return -ENOMEM; +} + +static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, + size_t size) +{ + struct erdma_mtt *mtt; + int ret = -ENOMEM; + + mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); + if (!mtt) + return NULL; + + mtt->size = ALIGN(size, PAGE_SIZE); + mtt->buf = vzalloc(mtt->size); + mtt->continuous = false; + if (!mtt->buf) + goto err_free_mtt; + + ret = erdma_create_mtt_buf_sg(dev, mtt); + if (ret) + goto err_free_mtt_buf; + + ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n", + mtt->size, mtt->nsg); + + return mtt; + +err_free_mtt_buf: + vfree(mtt->buf); + err_free_mtt: kfree(mtt); @@ -574,28 +684,77 @@ err_free_mtt: static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, bool force_continuous) { + struct erdma_mtt *mtt, *tmp_mtt; + int ret, level = 0; + ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size, force_continuous); + if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA)) + force_continuous = true; + if (force_continuous) return erdma_create_cont_mtt(dev, size); - return ERR_PTR(-EOPNOTSUPP); + mtt = erdma_create_scatter_mtt(dev, size); + if (IS_ERR(mtt)) + return mtt; + level = 1; + + /* convergence the mtt table. */ + while (mtt->nsg != 1 && level <= 3) { + tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg)); + if (IS_ERR(tmp_mtt)) { + ret = PTR_ERR(tmp_mtt); + goto err_free_mtt; + } + erdma_init_middle_mtt(tmp_mtt, mtt); + tmp_mtt->low_level = mtt; + mtt = tmp_mtt; + level++; + } + + if (level > 3) { + ret = -ENOMEM; + goto err_free_mtt; + } + + mtt->level = level; + ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n", + mtt->level, mtt->sglist[0].dma_address); + + return mtt; +err_free_mtt: + while (mtt) { + tmp_mtt = mtt->low_level; + erdma_destroy_scatter_mtt(dev, mtt); + mtt = tmp_mtt; + } + + return ERR_PTR(ret); } static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt) { + struct erdma_mtt *tmp_mtt; + if (mtt->continuous) { dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size, DMA_TO_DEVICE); kfree(mtt->buf); kfree(mtt); + } else { + while (mtt) { + tmp_mtt = mtt->low_level; + erdma_destroy_scatter_mtt(dev, mtt); + mtt = tmp_mtt; + } } } static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, u64 start, u64 len, int access, u64 virt, - unsigned long req_page_size, u8 force_indirect_mtt) + unsigned long req_page_size, bool force_continuous) { int ret = 0; @@ -612,7 +771,8 @@ static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, mem->page_offset = start & (mem->page_size - 1); mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); mem->page_cnt = mem->mtt_nents; - mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt), true); + mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt), + force_continuous); if (IS_ERR(mem->mtt)) { ret = PTR_ERR(mem->mtt); goto error_ret; @@ -717,7 +877,7 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va, qp->attrs.sq_size << SQEBB_SHIFT, 0, va, - (SZ_1M - SZ_4K), 1); + (SZ_1M - SZ_4K), true); if (ret) return ret; @@ -726,7 +886,7 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset, qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset, - (SZ_1M - SZ_4K), 1); + (SZ_1M - SZ_4K), true); if (ret) goto put_sq_mtt; @@ -998,7 +1158,7 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, return ERR_PTR(-ENOMEM); ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt, - SZ_2G - SZ_4K, 0); + SZ_2G - SZ_4K, false); if (ret) goto err_out_free; @@ -1423,7 +1583,7 @@ static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va, ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, - 1); + true); if (ret) return ret; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index 5f639f27a8a9..eb9c0f92fb6f 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -73,8 +73,8 @@ struct erdma_pd { #define ERDMA_MR_TYPE_FRMR 1 #define ERDMA_MR_TYPE_DMA 2 -#define ERDMA_MR_INLINE_MTT 0 -#define ERDMA_MR_INDIRECT_MTT 1 +#define ERDMA_MR_MTT_0LEVEL 0 +#define ERDMA_MR_MTT_1LEVEL 1 #define ERDMA_MR_ACC_RA BIT(0) #define ERDMA_MR_ACC_LR BIT(1) -- cgit From bb6d73d9add68ad270888db327514384dfa44958 Mon Sep 17 00:00:00 2001 From: Christopher Bednarz Date: Fri, 18 Aug 2023 09:48:38 -0500 Subject: RDMA/irdma: Prevent zero-length STAG registration Currently irdma allows zero-length STAGs to be programmed in HW during the kernel mode fast register flow. Zero-length MR or STAG registration disable HW memory length checks. Improve gaps in bounds checking in irdma by preventing zero-length STAG or MR registrations except if the IB_PD_UNSAFE_GLOBAL_RKEY is set. This addresses the disclosure CVE-2023-25775. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Christopher Bednarz Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230818144838.1758-1-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 6 ++++++ drivers/infiniband/hw/irdma/type.h | 2 ++ drivers/infiniband/hw/irdma/verbs.c | 10 ++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index b1fdddd2fa1a..55421a92882c 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1061,6 +1061,9 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, u64 hdr; enum irdma_page_size page_size; + if (!info->total_len && !info->all_memory) + return -EINVAL; + if (info->page_size == 0x40000000) page_size = IRDMA_PAGE_SIZE_1G; else if (info->page_size == 0x200000) @@ -1126,6 +1129,9 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, u8 addr_type; enum irdma_page_size page_size; + if (!info->total_len && !info->all_memory) + return -EINVAL; + if (info->page_size == 0x40000000) page_size = IRDMA_PAGE_SIZE_1G; else if (info->page_size == 0x200000) diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index b49a98c208bf..bee9609f4be7 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -969,6 +969,7 @@ struct irdma_allocate_stag_info { bool remote_access:1; bool use_hmc_fcn_index:1; bool use_pf_rid:1; + bool all_memory:1; u8 hmc_fcn_index; }; @@ -996,6 +997,7 @@ struct irdma_reg_ns_stag_info { bool use_hmc_fcn_index:1; u8 hmc_fcn_index; bool use_pf_rid:1; + bool all_memory:1; }; struct irdma_fast_reg_stag_info { diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 6cffe21558fe..3eb7a7a3a975 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2624,7 +2624,8 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, struct irdma_mr *iwmr) { struct irdma_allocate_stag_info *info; - struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); + struct ib_pd *pd = iwmr->ibmr.pd; + struct irdma_pd *iwpd = to_iwpd(pd); int status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2640,6 +2641,7 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; info->pd_id = iwpd->sc_pd.pd_id; info->total_len = iwmr->len; + info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; info->remote_access = true; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG; cqp_info->post_sq = 1; @@ -2687,6 +2689,8 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, iwmr->type = IRDMA_MEMREG_TYPE_MEM; palloc = &iwpbl->pble_alloc; iwmr->page_cnt = max_num_sg; + /* Use system PAGE_SIZE as the sg page sizes are unknown at this point */ + iwmr->len = max_num_sg * PAGE_SIZE; err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, false); if (err_code) @@ -2766,7 +2770,8 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_reg_ns_stag_info *stag_info; - struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); + struct ib_pd *pd = iwmr->ibmr.pd; + struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2785,6 +2790,7 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, stag_info->total_len = iwmr->len; stag_info->access_rights = irdma_get_mr_access(access); stag_info->pd_id = iwpd->sc_pd.pd_id; + stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED) stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED; else -- cgit From c6c0052df25ab9f37f8b29e9d55a9c877f3c6c5f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Aug 2023 16:53:15 +0300 Subject: RDMA/bnxt_re: Fix kernel doc errors Fix set of the following errors due to use of wrong kernel doc format to describe function parameters: drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:68: warning: Function parameter or member 'rcfw' not described in '__wait_for_resp' Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308180600.oOnkIAQV-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202308180401.iaj2ktTc-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202308180214.Lt9NAhbM-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202308180055.6zM4AK6V-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202308172136.ipx1wvs6-lkp@intel.com/ Link: https://lore.kernel.org/r/4b22c385f1b68590ace8f82f2985d14b20999432.1692539554.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 287117ec50ee..524a5ff58872 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -55,7 +55,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t); /** * bnxt_qplib_map_rc - map return type based on opcode - * @opcode - roce slow path opcode + * @opcode: roce slow path opcode * * case #1 * Firmware initiated error recovery is a safe state machine and @@ -98,8 +98,8 @@ static int bnxt_qplib_map_rc(u8 opcode) /** * bnxt_re_is_fw_stalled - Check firmware health - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * If firmware has not responded any rcfw command within * rcfw->max_timeout, consider firmware as stalled. @@ -133,8 +133,8 @@ static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw, /** * __wait_for_resp - Don't hold the cpu context and wait for response - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * Wait for command completion in sleepable context. * @@ -179,8 +179,8 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) /** * __block_for_resp - hold the cpu context and wait for response - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * This function will hold the cpu (non-sleepable context) and * wait for command completion. Maximum holding interval is 8 second. @@ -216,8 +216,8 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) }; /* __send_message_no_waiter - get cookie and post the message. - * @rcfw - rcfw channel instance of rdev - * @msg - qplib message internal + * @rcfw: rcfw channel instance of rdev + * @msg: qplib message internal * * This function will just post and don't bother about completion. * Current design of this function is - @@ -374,8 +374,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, /** * __poll_for_resp - self poll completion for rcfw command - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * It works same as __wait_for_resp except this function will * do self polling in sort interval since interrupt is disabled. @@ -471,8 +471,8 @@ static void __destroy_timedout_ah(struct bnxt_qplib_rcfw *rcfw, /** * __bnxt_qplib_rcfw_send_message - qplib interface to send * and complete rcfw command. - * @rcfw - rcfw channel instance of rdev - * @msg - qplib message internal + * @rcfw: rcfw channel instance of rdev + * @msg: qplib message internal * * This function does not account shadow queue depth. It will send * all the command unconditionally as long as send queue is not full. @@ -534,8 +534,8 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, /** * bnxt_qplib_rcfw_send_message - qplib interface to send * and complete rcfw command. - * @rcfw - rcfw channel instance of rdev - * @msg - qplib message internal + * @rcfw: rcfw channel instance of rdev + * @msg: qplib message internal * * Driver interact with Firmware through rcfw channel/slow path in two ways. * a. Blocking rcfw command send. In this path, driver cannot hold -- cgit From dfe261107c080709459c32695847eec96238852b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 21 Aug 2023 10:57:14 +0300 Subject: Revert "IB/isert: Fix incorrect release of isert connection" Commit: 699826f4e30a ("IB/isert: Fix incorrect release of isert connection") is causing problems on OPA when DEVICE_REMOVAL is happening. ------------[ cut here ]------------ WARNING: CPU: 52 PID: 2117247 at drivers/infiniband/core/cq.c:359 ib_cq_pool_cleanup+0xac/0xb0 [ib_core] Modules linked in: nfsd nfs_acl target_core_user uio tcm_fc libfc scsi_transport_fc tcm_loop target_core_pscsi target_core_iblock target_core_file rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill rpcrdma rdma_ucm ib_srpt sunrpc ib_isert iscsi_target_mod target_core_mod opa_vnic ib_iser libiscsi ib_umad scsi_transport_iscsi rdma_cm ib_ipoib iw_cm ib_cm hfi1(-) rdmavt ib_uverbs intel_rapl_msr intel_rapl_common sb_edac ib_core x86_pkg_temp_thermal intel_powerclamp coretemp i2c_i801 mxm_wmi rapl iTCO_wdt ipmi_si iTCO_vendor_support mei_me ipmi_devintf mei intel_cstate ioatdma intel_uncore i2c_smbus joydev pcspkr lpc_ich ipmi_msghandler acpi_power_meter acpi_pad xfs libcrc32c sr_mod sd_mod cdrom t10_pi sg crct10dif_pclmul crc32_pclmul crc32c_intel drm_kms_helper drm_shmem_helper ahci libahci ghash_clmulni_intel igb drm libata dca i2c_algo_bit wmi fuse CPU: 52 PID: 2117247 Comm: modprobe Not tainted 6.5.0-rc1+ #1 Hardware name: Intel Corporation S2600CWR/S2600CW, BIOS SE5C610.86B.01.01.0014.121820151719 12/18/2015 RIP: 0010:ib_cq_pool_cleanup+0xac/0xb0 [ib_core] Code: ff 48 8b 43 40 48 8d 7b 40 48 83 e8 40 4c 39 e7 75 b3 49 83 c4 10 4d 39 fc 75 94 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc <0f> 0b eb a1 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f RSP: 0018:ffffc10bea13fc80 EFLAGS: 00010206 RAX: 000000000000010c RBX: ffff9bf5c7e66c00 RCX: 000000008020001d RDX: 000000008020001e RSI: fffff175221f9900 RDI: ffff9bf5c7e67640 RBP: ffff9bf5c7e67600 R08: ffff9bf5c7e64400 R09: 000000008020001d R10: 0000000040000000 R11: 0000000000000000 R12: ffff9bee4b1e8a18 R13: dead000000000122 R14: dead000000000100 R15: ffff9bee4b1e8a38 FS: 00007ff1e6d38740(0000) GS:ffff9bfd9fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005652044ecc68 CR3: 0000000889b5c005 CR4: 00000000001706e0 Call Trace: ? __warn+0x80/0x130 ? ib_cq_pool_cleanup+0xac/0xb0 [ib_core] ? report_bug+0x195/0x1a0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? ib_cq_pool_cleanup+0xac/0xb0 [ib_core] disable_device+0x9d/0x160 [ib_core] __ib_unregister_device+0x42/0xb0 [ib_core] ib_unregister_device+0x22/0x30 [ib_core] rvt_unregister_device+0x20/0x90 [rdmavt] hfi1_unregister_ib_device+0x16/0xf0 [hfi1] remove_one+0x55/0x1a0 [hfi1] pci_device_remove+0x36/0xa0 device_release_driver_internal+0x193/0x200 driver_detach+0x44/0x90 bus_remove_driver+0x69/0xf0 pci_unregister_driver+0x2a/0xb0 hfi1_mod_cleanup+0xc/0x3c [hfi1] __do_sys_delete_module.constprop.0+0x17a/0x2f0 ? exit_to_user_mode_prepare+0xc4/0xd0 ? syscall_trace_enter.constprop.0+0x126/0x1a0 do_syscall_64+0x5c/0x90 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 ? exc_page_fault+0x65/0x150 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7ff1e643f5ab Code: 73 01 c3 48 8b 0d 75 a8 1b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 45 a8 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007ffec9103cc8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 00005615267fdc50 RCX: 00007ff1e643f5ab RDX: 0000000000000000 RSI: 0000000000000800 RDI: 00005615267fdcb8 RBP: 00005615267fdc50 R08: 0000000000000000 R09: 0000000000000000 R10: 00007ff1e659eac0 R11: 0000000000000206 R12: 00005615267fdcb8 R13: 0000000000000000 R14: 00005615267fdcb8 R15: 00007ffec9105ff8 ---[ end trace 0000000000000000 ]--- And... restrack: ------------[ cut here ]------------ infiniband hfi1_0: BUG: RESTRACK detected leak of resources restrack: Kernel PD object allocated by ib_isert is not freed restrack: Kernel CQ object allocated by ib_core is not freed restrack: Kernel QP object allocated by rdma_cm is not freed restrack: ------------[ cut here ]------------ Fixes: 699826f4e30a ("IB/isert: Fix incorrect release of isert connection") Reported-by: Dennis Dalessandro Closes: https://lore.kernel.org/all/921cd1d9-2879-f455-1f50-0053fe6a6655@cornelisnetworks.com Link: https://lore.kernel.org/r/a27982d3235005c58f6d321f3fad5eb6e1beaf9e.1692604607.git.leonro@nvidia.com Tested-by: Dennis Dalessandro Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/isert/ib_isert.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 92e1e7587af8..00a7303c8cc6 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2570,6 +2570,8 @@ static void isert_wait_conn(struct iscsit_conn *conn) isert_put_unsol_pending_cmds(conn); isert_wait4cmds(conn); isert_wait4logout(isert_conn); + + queue_work(isert_release_wq, &isert_conn->release_work); } static void isert_free_conn(struct iscsit_conn *conn) -- cgit From b056327bee09e6b86683d3f709a438ccd6031d72 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 21 Aug 2023 21:32:53 +0800 Subject: RDMA/siw: Balance the reference of cep->kref in the error path The siw_connect can go to err in below after cep is allocated successfully: 1. If siw_cm_alloc_work returns failure. In this case socket is not assoicated with cep so siw_cep_put can't be called by siw_socket_disassoc. We need to call siw_cep_put twice since cep->kref is increased once after it was initialized. 2. If siw_cm_queue_work can't find a work, which means siw_cep_get is not called in siw_cm_queue_work, so cep->kref is increased twice by siw_cep_get and when associate socket with cep after it was initialized. So we need to call siw_cep_put three times (one in siw_socket_disassoc). 3. siw_send_mpareqrep returns error, this scenario is similar as 2. So we need to remove one siw_cep_put in the error path. Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20230821133255.31111-2-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index da530c0404da..a2605178f4ed 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1501,7 +1501,6 @@ error: cep->cm_id = NULL; id->rem_ref(id); - siw_cep_put(cep); qp->cep = NULL; siw_cep_put(cep); -- cgit From bee024d20451e4ce04ea30099cad09f7f75d288b Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 21 Aug 2023 21:32:54 +0800 Subject: RDMA/siw: Correct wrong debug message We need to print num_sle first then pbl->max_buf per the condition. Also replace mem->pbl with pbl while at it. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20230821133255.31111-3-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index fadfa70853f3..fdbef3254e30 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1494,7 +1494,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, if (pbl->max_buf < num_sle) { siw_dbg_mem(mem, "too many SGE's: %d > %d\n", - mem->pbl->max_buf, num_sle); + num_sle, pbl->max_buf); return -ENOMEM; } for_each_sg(sl, slp, num_sle, i) { -- cgit From 9dfccb6d0d3d13347c61ff0136b22d5d772d2075 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 21 Aug 2023 21:32:55 +0800 Subject: RDMA/siw: Call llist_reverse_order in siw_run_sq We can call the function to get fifo list. Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20230821133255.31111-4-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_tx.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 3ff339eceec3..60b6a4135961 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1271,13 +1271,7 @@ int siw_run_sq(void *data) * llist_del_all returns a list with newest entry first. * Re-order list for fairness among QP's. */ - while (active) { - struct llist_node *tmp = active; - - active = llist_next(active); - tmp->next = fifo_list; - fifo_list = tmp; - } + fifo_list = llist_reverse_order(active); while (fifo_list) { qp = container_of(fifo_list, struct siw_qp, tx_list); fifo_list = llist_next(fifo_list); -- cgit From dc202c57e9a1423aed528e4b8dc949509cd32191 Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Tue, 22 Aug 2023 08:27:25 +0000 Subject: RDMA/efa: Fix wrong resources deallocation order When trying to destroy QP or CQ, we first decrease the refcount and potentially free memory regions allocated for the object and then request the device to destroy the object. If the device fails, the object isn't fully destroyed so the user/IB core can try to destroy the object again which will lead to underflow when trying to decrease an already zeroed refcount. Deallocate resources in reverse order of allocating them to safely free them. Fixes: ff6629f88c52 ("RDMA/efa: Do not delay freeing of DMA pages") Reviewed-by: Michael Margolin Reviewed-by: Yossi Leybovich Signed-off-by: Yonatan Nachum Link: https://lore.kernel.org/r/20230822082725.31719-1-ynachum@amazon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 7a27d79c0541..0f8ca99d0827 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -453,12 +453,12 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); - efa_qp_user_mmap_entries_remove(qp); - err = efa_destroy_qp_handle(dev, qp->qp_handle); if (err) return err; + efa_qp_user_mmap_entries_remove(qp); + if (qp->rq_cpu_addr) { ibdev_dbg(&dev->ibdev, "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", @@ -1017,8 +1017,8 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); - efa_cq_user_mmap_entries_remove(cq); efa_destroy_cq_idx(dev, cq->cq_idx); + efa_cq_user_mmap_entries_remove(cq); if (cq->eq) { xa_erase(&dev->cqs_xa, cq->cq_idx); synchronize_irq(cq->eq->irq.irqn); -- cgit From 6812e06999054792e13193666989dbdc01642625 Mon Sep 17 00:00:00 2001 From: Rohit Chavan Date: Tue, 22 Aug 2023 14:43:04 +0530 Subject: RDMA/rxe: Fix redundant break statement in switch-case. Removed unreachable break statement after return. Signed-off-by: Rohit Chavan Link: https://lore.kernel.org/r/20230822091304.7312-1-roheetchavan@gmail.com Acked-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_verbs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 903f0b71447e..48f86839d36a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -798,7 +798,6 @@ static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, rxe_err_qp(qp, "unsupported wr opcode %d", wr->opcode); return -EINVAL; - break; } } -- cgit From d3c2245754220b0fd4c6868e2fe48741a734be58 Mon Sep 17 00:00:00 2001 From: Rohit Chavan Date: Tue, 22 Aug 2023 17:34:51 +0530 Subject: RDMA/mlx5: Fix trailing */ formatting in block comment Resolved a formatting issue where the trailing */ in a block comment was placed on a same line instead of separate line. Signed-off-by: Rohit Chavan Link: https://lore.kernel.org/r/20230822120451.8215-1-roheetchavan@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 92f35fafb2c0..3e345ef380f1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1235,7 +1235,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, } /* The pg_access bit allows setting the access flags - * in the page list submitted with the command. */ + * in the page list submitted with the command. + */ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); -- cgit From 3d91dfe72aac335e1c3f33de8bda537c026ccc8e Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 22 Aug 2023 11:35:38 +0800 Subject: RDMA/hfi1: Use list_for_each_entry() helper Convert list_for_each() to list_for_each_entry() so that the pos list_head pointer and list_entry() call are no longer needed, which can reduce a few lines of code. No functional changed. Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20230822033539.3692453-1-ruanjinjie@huawei.com Acked-by: Dennis Dalessandro Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/affinity.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 77ee77d4000f..bbc957c578e1 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -230,11 +230,9 @@ static void node_affinity_add_tail(struct hfi1_affinity_node *entry) /* It must be called with node_affinity.lock held */ static struct hfi1_affinity_node *node_affinity_lookup(int node) { - struct list_head *pos; struct hfi1_affinity_node *entry; - list_for_each(pos, &node_affinity.list) { - entry = list_entry(pos, struct hfi1_affinity_node, list); + list_for_each_entry(entry, &node_affinity.list, list) { if (entry->node == node) return entry; } -- cgit From d2c0234634533784b2fe0f86f1006489adb55876 Mon Sep 17 00:00:00 2001 From: Brendan Cunningham Date: Tue, 22 Aug 2023 10:07:53 -0400 Subject: RDMA/hfi1: Move user SDMA system memory pinning code to its own file Move user SDMA system memory page-pinning code from user_sdma.c to pin_system.c. Put declarations for non-static functions in pinning.h. System memory pinning is necessary for processing user SDMA requests but actual steps are invisible to user SDMA request-processing code. Moving system memory pinning code for user SDMA to its own file makes this distinction apparent. These changes have no effect on userspace. Signed-off-by: Patrick Kelsey Signed-off-by: Brendan Cunningham Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/169271327311.1855761.4736714053318724062.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/Makefile | 1 + drivers/infiniband/hw/hfi1/hfi.h | 4 +- drivers/infiniband/hw/hfi1/pin_system.c | 474 ++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/pinning.h | 20 ++ drivers/infiniband/hw/hfi1/user_sdma.c | 441 +---------------------------- drivers/infiniband/hw/hfi1/user_sdma.h | 17 +- 6 files changed, 505 insertions(+), 452 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/pin_system.c create mode 100644 drivers/infiniband/hw/hfi1/pinning.h (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 2e89ec10efed..5d977f363684 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -31,6 +31,7 @@ hfi1-y := \ netdev_rx.o \ opfn.o \ pcie.o \ + pin_system.o \ pio.o \ pio_copy.o \ platform.o \ diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7fa9cd39254f..38772e52d7ed 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* - * Copyright(c) 2020 Cornelis Networks, Inc. + * Copyright(c) 2020-2023 Cornelis Networks, Inc. * Copyright(c) 2015-2020 Intel Corporation. */ @@ -1378,8 +1378,6 @@ struct hfi1_devdata { #define PT_INVALID 3 struct tid_rb_node; -struct mmu_rb_node; -struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { diff --git a/drivers/infiniband/hw/hfi1/pin_system.c b/drivers/infiniband/hw/hfi1/pin_system.c new file mode 100644 index 000000000000..384f722093e0 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/pin_system.c @@ -0,0 +1,474 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +/* + * Copyright(c) 2023 - Cornelis Networks, Inc. + */ + +#include + +#include "hfi.h" +#include "common.h" +#include "device.h" +#include "pinning.h" +#include "mmu_rb.h" +#include "user_sdma.h" +#include "trace.h" + +struct sdma_mmu_node { + struct mmu_rb_node rb; + struct hfi1_user_sdma_pkt_q *pq; + struct page **pages; + unsigned int npages; +}; + +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len); +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2, + bool *stop); +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); + +static struct mmu_rb_ops sdma_rb_ops = { + .filter = sdma_rb_filter, + .evict = sdma_rb_evict, + .remove = sdma_rb_remove, +}; + +int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq) +{ + struct hfi1_devdata *dd = pq->dd; + int ret; + + ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq, + &pq->handler); + if (ret) + dd_dev_err(dd, + "[%u:%u] Failed to register system memory DMA support with MMU: %d\n", + pq->ctxt, pq->subctxt, ret); + return ret; +} + +void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq) +{ + if (pq->handler) + hfi1_mmu_rb_unregister(pq->handler); +} + +static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) +{ + struct evict_data evict_data; + + evict_data.cleared = 0; + evict_data.target = npages; + hfi1_mmu_rb_evict(pq->handler, &evict_data); + return evict_data.cleared; +} + +static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, + unsigned int start, unsigned int npages) +{ + hfi1_release_user_pages(mm, pages + start, npages, false); + kfree(pages); +} + +static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node) +{ + return node->rb.handler->mn.mm; +} + +static void free_system_node(struct sdma_mmu_node *node) +{ + if (node->npages) { + unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, + node->npages); + atomic_sub(node->npages, &node->pq->n_locked); + } + kfree(node); +} + +/* + * kref_get()'s an additional kref on the returned rb_node to prevent rb_node + * from being released until after rb_node is assigned to an SDMA descriptor + * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the + * virtual address range for rb_node is invalidated between now and then. + */ +static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, + unsigned long start, + unsigned long end) +{ + struct mmu_rb_node *rb_node; + unsigned long flags; + + spin_lock_irqsave(&handler->lock, flags); + rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start)); + if (!rb_node) { + spin_unlock_irqrestore(&handler->lock, flags); + return NULL; + } + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&rb_node->refcount); + spin_unlock_irqrestore(&handler->lock, flags); + + return container_of(rb_node, struct sdma_mmu_node, rb); +} + +static int pin_system_pages(struct user_sdma_request *req, + uintptr_t start_address, size_t length, + struct sdma_mmu_node *node, int npages) +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + int pinned, cleared; + struct page **pages; + + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + +retry: + if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked), + npages)) { + SDMA_DBG(req, "Evicting: nlocked %u npages %u", + atomic_read(&pq->n_locked), npages); + cleared = sdma_cache_evict(pq, npages); + if (cleared >= npages) + goto retry; + } + + SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u", + start_address, node->npages, npages); + pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0, + pages); + + if (pinned < 0) { + kfree(pages); + SDMA_DBG(req, "pinned %d", pinned); + return pinned; + } + if (pinned != npages) { + unpin_vector_pages(current->mm, pages, node->npages, pinned); + SDMA_DBG(req, "npages %u pinned %d", npages, pinned); + return -EFAULT; + } + node->rb.addr = start_address; + node->rb.len = length; + node->pages = pages; + node->npages = npages; + atomic_add(pinned, &pq->n_locked); + SDMA_DBG(req, "done. pinned %d", pinned); + return 0; +} + +/* + * kref refcount on *node_p will be 2 on successful addition: one kref from + * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being + * released until after *node_p is assigned to an SDMA descriptor (struct + * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual + * address range for *node_p is invalidated between now and then. + */ +static int add_system_pinning(struct user_sdma_request *req, + struct sdma_mmu_node **node_p, + unsigned long start, unsigned long len) + +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + struct sdma_mmu_node *node; + int ret; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + /* First kref "moves" to mmu_rb_handler */ + kref_init(&node->rb.refcount); + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&node->rb.refcount); + + node->pq = pq; + ret = pin_system_pages(req, start, len, node, PFN_DOWN(len)); + if (ret == 0) { + ret = hfi1_mmu_rb_insert(pq->handler, &node->rb); + if (ret) + free_system_node(node); + else + *node_p = node; + + return ret; + } + + kfree(node); + return ret; +} + +static int get_system_cache_entry(struct user_sdma_request *req, + struct sdma_mmu_node **node_p, + size_t req_start, size_t req_len) +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + u64 start = ALIGN_DOWN(req_start, PAGE_SIZE); + u64 end = PFN_ALIGN(req_start + req_len); + int ret; + + if ((end - start) == 0) { + SDMA_DBG(req, + "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx", + req_start, req_len, start, end); + return -EINVAL; + } + + SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len); + + while (1) { + struct sdma_mmu_node *node = + find_system_node(pq->handler, start, end); + u64 prepend_len = 0; + + SDMA_DBG(req, "node %p start %llx end %llu", node, start, end); + if (!node) { + ret = add_system_pinning(req, node_p, start, + end - start); + if (ret == -EEXIST) { + /* + * Another execution context has inserted a + * conficting entry first. + */ + continue; + } + return ret; + } + + if (node->rb.addr <= start) { + /* + * This entry covers at least part of the region. If it doesn't extend + * to the end, then this will be called again for the next segment. + */ + *node_p = node; + return 0; + } + + SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d", + node->rb.addr, kref_read(&node->rb.refcount)); + prepend_len = node->rb.addr - start; + + /* + * This node will not be returned, instead a new node + * will be. So release the reference. + */ + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); + + /* Prepend a node to cover the beginning of the allocation */ + ret = add_system_pinning(req, node_p, start, prepend_len); + if (ret == -EEXIST) { + /* Another execution context has inserted a conficting entry first. */ + continue; + } + return ret; + } +} + +static void sdma_mmu_rb_node_get(void *ctx) +{ + struct mmu_rb_node *node = ctx; + + kref_get(&node->refcount); +} + +static void sdma_mmu_rb_node_put(void *ctx) +{ + struct sdma_mmu_node *node = ctx; + + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); +} + +static int add_mapping_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct sdma_mmu_node *cache_entry, + size_t start, + size_t from_this_cache_entry) +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + unsigned int page_offset; + unsigned int from_this_page; + size_t page_index; + void *ctx; + int ret; + + /* + * Because the cache may be more fragmented than the memory that is being accessed, + * it's not strictly necessary to have a descriptor per cache entry. + */ + + while (from_this_cache_entry) { + page_index = PFN_DOWN(start - cache_entry->rb.addr); + + if (page_index >= cache_entry->npages) { + SDMA_DBG(req, + "Request for page_index %zu >= cache_entry->npages %u", + page_index, cache_entry->npages); + return -EINVAL; + } + + page_offset = start - ALIGN_DOWN(start, PAGE_SIZE); + from_this_page = PAGE_SIZE - page_offset; + + if (from_this_page < from_this_cache_entry) { + ctx = NULL; + } else { + /* + * In the case they are equal the next line has no practical effect, + * but it's better to do a register to register copy than a conditional + * branch. + */ + from_this_page = from_this_cache_entry; + ctx = cache_entry; + } + + ret = sdma_txadd_page(pq->dd, &tx->txreq, + cache_entry->pages[page_index], + page_offset, from_this_page, + ctx, + sdma_mmu_rb_node_get, + sdma_mmu_rb_node_put); + if (ret) { + /* + * When there's a failure, the entire request is freed by + * user_sdma_send_pkts(). + */ + SDMA_DBG(req, + "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u", + ret, page_index, page_offset, from_this_page); + return ret; + } + start += from_this_page; + from_this_cache_entry -= from_this_page; + } + return 0; +} + +static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, + size_t from_this_iovec) +{ + while (from_this_iovec > 0) { + struct sdma_mmu_node *cache_entry; + size_t from_this_cache_entry; + size_t start; + int ret; + + start = (uintptr_t)iovec->iov.iov_base + iovec->offset; + ret = get_system_cache_entry(req, &cache_entry, start, + from_this_iovec); + if (ret) { + SDMA_DBG(req, "pin system segment failed %d", ret); + return ret; + } + + from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr); + if (from_this_cache_entry > from_this_iovec) + from_this_cache_entry = from_this_iovec; + + ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start, + from_this_cache_entry); + + /* + * Done adding cache_entry to zero or more sdma_desc. Can + * kref_put() the "safety" kref taken under + * get_system_cache_entry(). + */ + kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release); + + if (ret) { + SDMA_DBG(req, "add system segment failed %d", ret); + return ret; + } + + iovec->offset += from_this_cache_entry; + from_this_iovec -= from_this_cache_entry; + } + + return 0; +} + +/* + * Add up to pkt_data_remaining bytes to the txreq, starting at the current + * offset in the given iovec entry and continuing until all data has been added + * to the iovec or the iovec entry type changes. + * + * On success, prior to returning, adjust pkt_data_remaining, req->iov_idx, and + * the offset value in req->iov[req->iov_idx] to reflect the data that has been + * consumed. + */ +int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, + u32 *pkt_data_remaining) +{ + size_t remaining_to_add = *pkt_data_remaining; + /* + * Walk through iovec entries, ensure the associated pages + * are pinned and mapped, add data to the packet until no more + * data remains to be added or the iovec entry type changes. + */ + while (remaining_to_add > 0) { + struct user_sdma_iovec *cur_iovec; + size_t from_this_iovec; + int ret; + + cur_iovec = iovec; + from_this_iovec = iovec->iov.iov_len - iovec->offset; + + if (from_this_iovec > remaining_to_add) { + from_this_iovec = remaining_to_add; + } else { + /* The current iovec entry will be consumed by this pass. */ + req->iov_idx++; + iovec++; + } + + ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec, + from_this_iovec); + if (ret) + return ret; + + remaining_to_add -= from_this_iovec; + } + *pkt_data_remaining = remaining_to_add; + + return 0; +} + +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len) +{ + return (bool)(node->addr == addr); +} + +/* + * Return 1 to remove the node from the rb tree and call the remove op. + * + * Called with the rb tree lock held. + */ +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, + void *evict_arg, bool *stop) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + struct evict_data *evict_data = evict_arg; + + /* this node will be evicted, add its pages to our count */ + evict_data->cleared += node->npages; + + /* have enough pages been cleared? */ + if (evict_data->cleared >= evict_data->target) + *stop = true; + + return 1; /* remove this node */ +} + +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + + free_system_node(node); +} diff --git a/drivers/infiniband/hw/hfi1/pinning.h b/drivers/infiniband/hw/hfi1/pinning.h new file mode 100644 index 000000000000..a814a3aa9654 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/pinning.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ +/* + * Copyright(c) 2023 Cornelis Networks, Inc. + */ +#ifndef _HFI1_PINNING_H +#define _HFI1_PINNING_H + +struct hfi1_user_sdma_pkt_q; +struct user_sdma_request; +struct user_sdma_txreq; +struct user_sdma_iovec; + +int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq); +void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq); +int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, + u32 *pkt_data_remaining); + +#endif /* _HFI1_PINNING_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 02bd62b857b7..29ae7beb9b03 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* - * Copyright(c) 2020 - Cornelis Networks, Inc. + * Copyright(c) 2020 - 2023 Cornelis Networks, Inc. * Copyright(c) 2015 - 2018 Intel Corporation. */ @@ -60,22 +60,6 @@ static int defer_packet_queue( uint seq, bool pkts_sent); static void activate_packet_queue(struct iowait *wait, int reason); -static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, - unsigned long len); -static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, - void *arg2, bool *stop); -static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); - -static struct mmu_rb_ops sdma_rb_ops = { - .filter = sdma_rb_filter, - .evict = sdma_rb_evict, - .remove = sdma_rb_remove, -}; - -static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct user_sdma_iovec *iovec, - u32 *pkt_remaining); static int defer_packet_queue( struct sdma_engine *sde, @@ -185,12 +169,9 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, cq->nentries = hfi1_sdma_comp_ring_size; - ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq, - &pq->handler); - if (ret) { - dd_dev_err(dd, "Failed to register with MMU %d", ret); + ret = hfi1_init_system_pinning(pq); + if (ret) goto pq_mmu_fail; - } rcu_assign_pointer(fd->pq, pq); fd->cq = cq; @@ -249,8 +230,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, pq->wait, !atomic_read(&pq->n_reqs)); kfree(pq->reqs); - if (pq->handler) - hfi1_mmu_rb_unregister(pq->handler); + hfi1_free_system_pinning(pq); bitmap_free(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); flush_pq_iowait(pq); @@ -821,8 +801,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) req->tidoffset += datalen; req->sent += datalen; while (datalen) { - ret = add_system_pages_to_sdma_packet(req, tx, iovec, - &datalen); + ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec, + &datalen); if (ret) goto free_txreq; iovec = &req->iovs[req->iov_idx]; @@ -860,17 +840,6 @@ free_tx: return ret; } -static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) -{ - struct evict_data evict_data; - struct mmu_rb_handler *handler = pq->handler; - - evict_data.cleared = 0; - evict_data.target = npages; - hfi1_mmu_rb_evict(handler, &evict_data); - return evict_data.cleared; -} - static int check_header_template(struct user_sdma_request *req, struct hfi1_pkt_header *hdr, u32 lrhlen, u32 datalen) @@ -1253,401 +1222,3 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, idx, state, ret); } - -static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, - unsigned int start, unsigned int npages) -{ - hfi1_release_user_pages(mm, pages + start, npages, false); - kfree(pages); -} - -static void free_system_node(struct sdma_mmu_node *node) -{ - if (node->npages) { - unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, - node->npages); - atomic_sub(node->npages, &node->pq->n_locked); - } - kfree(node); -} - -/* - * kref_get()'s an additional kref on the returned rb_node to prevent rb_node - * from being released until after rb_node is assigned to an SDMA descriptor - * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the - * virtual address range for rb_node is invalidated between now and then. - */ -static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, - unsigned long start, - unsigned long end) -{ - struct mmu_rb_node *rb_node; - unsigned long flags; - - spin_lock_irqsave(&handler->lock, flags); - rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start)); - if (!rb_node) { - spin_unlock_irqrestore(&handler->lock, flags); - return NULL; - } - - /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ - kref_get(&rb_node->refcount); - spin_unlock_irqrestore(&handler->lock, flags); - - return container_of(rb_node, struct sdma_mmu_node, rb); -} - -static int pin_system_pages(struct user_sdma_request *req, - uintptr_t start_address, size_t length, - struct sdma_mmu_node *node, int npages) -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - int pinned, cleared; - struct page **pages; - - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; - -retry: - if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked), - npages)) { - SDMA_DBG(req, "Evicting: nlocked %u npages %u", - atomic_read(&pq->n_locked), npages); - cleared = sdma_cache_evict(pq, npages); - if (cleared >= npages) - goto retry; - } - - SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u", - start_address, node->npages, npages); - pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0, - pages); - - if (pinned < 0) { - kfree(pages); - SDMA_DBG(req, "pinned %d", pinned); - return pinned; - } - if (pinned != npages) { - unpin_vector_pages(current->mm, pages, node->npages, pinned); - SDMA_DBG(req, "npages %u pinned %d", npages, pinned); - return -EFAULT; - } - node->rb.addr = start_address; - node->rb.len = length; - node->pages = pages; - node->npages = npages; - atomic_add(pinned, &pq->n_locked); - SDMA_DBG(req, "done. pinned %d", pinned); - return 0; -} - -/* - * kref refcount on *node_p will be 2 on successful addition: one kref from - * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being - * released until after *node_p is assigned to an SDMA descriptor (struct - * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual - * address range for *node_p is invalidated between now and then. - */ -static int add_system_pinning(struct user_sdma_request *req, - struct sdma_mmu_node **node_p, - unsigned long start, unsigned long len) - -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - struct sdma_mmu_node *node; - int ret; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - - /* First kref "moves" to mmu_rb_handler */ - kref_init(&node->rb.refcount); - - /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ - kref_get(&node->rb.refcount); - - node->pq = pq; - ret = pin_system_pages(req, start, len, node, PFN_DOWN(len)); - if (ret == 0) { - ret = hfi1_mmu_rb_insert(pq->handler, &node->rb); - if (ret) - free_system_node(node); - else - *node_p = node; - - return ret; - } - - kfree(node); - return ret; -} - -static int get_system_cache_entry(struct user_sdma_request *req, - struct sdma_mmu_node **node_p, - size_t req_start, size_t req_len) -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - u64 start = ALIGN_DOWN(req_start, PAGE_SIZE); - u64 end = PFN_ALIGN(req_start + req_len); - struct mmu_rb_handler *handler = pq->handler; - int ret; - - if ((end - start) == 0) { - SDMA_DBG(req, - "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx", - req_start, req_len, start, end); - return -EINVAL; - } - - SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len); - - while (1) { - struct sdma_mmu_node *node = - find_system_node(handler, start, end); - u64 prepend_len = 0; - - SDMA_DBG(req, "node %p start %llx end %llu", node, start, end); - if (!node) { - ret = add_system_pinning(req, node_p, start, - end - start); - if (ret == -EEXIST) { - /* - * Another execution context has inserted a - * conficting entry first. - */ - continue; - } - return ret; - } - - if (node->rb.addr <= start) { - /* - * This entry covers at least part of the region. If it doesn't extend - * to the end, then this will be called again for the next segment. - */ - *node_p = node; - return 0; - } - - SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d", - node->rb.addr, kref_read(&node->rb.refcount)); - prepend_len = node->rb.addr - start; - - /* - * This node will not be returned, instead a new node - * will be. So release the reference. - */ - kref_put(&node->rb.refcount, hfi1_mmu_rb_release); - - /* Prepend a node to cover the beginning of the allocation */ - ret = add_system_pinning(req, node_p, start, prepend_len); - if (ret == -EEXIST) { - /* Another execution context has inserted a conficting entry first. */ - continue; - } - return ret; - } -} - -static void sdma_mmu_rb_node_get(void *ctx) -{ - struct mmu_rb_node *node = ctx; - - kref_get(&node->refcount); -} - -static void sdma_mmu_rb_node_put(void *ctx) -{ - struct sdma_mmu_node *node = ctx; - - kref_put(&node->rb.refcount, hfi1_mmu_rb_release); -} - -static int add_mapping_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct sdma_mmu_node *cache_entry, - size_t start, - size_t from_this_cache_entry) -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - unsigned int page_offset; - unsigned int from_this_page; - size_t page_index; - void *ctx; - int ret; - - /* - * Because the cache may be more fragmented than the memory that is being accessed, - * it's not strictly necessary to have a descriptor per cache entry. - */ - - while (from_this_cache_entry) { - page_index = PFN_DOWN(start - cache_entry->rb.addr); - - if (page_index >= cache_entry->npages) { - SDMA_DBG(req, - "Request for page_index %zu >= cache_entry->npages %u", - page_index, cache_entry->npages); - return -EINVAL; - } - - page_offset = start - ALIGN_DOWN(start, PAGE_SIZE); - from_this_page = PAGE_SIZE - page_offset; - - if (from_this_page < from_this_cache_entry) { - ctx = NULL; - } else { - /* - * In the case they are equal the next line has no practical effect, - * but it's better to do a register to register copy than a conditional - * branch. - */ - from_this_page = from_this_cache_entry; - ctx = cache_entry; - } - - ret = sdma_txadd_page(pq->dd, &tx->txreq, - cache_entry->pages[page_index], - page_offset, from_this_page, - ctx, - sdma_mmu_rb_node_get, - sdma_mmu_rb_node_put); - if (ret) { - /* - * When there's a failure, the entire request is freed by - * user_sdma_send_pkts(). - */ - SDMA_DBG(req, - "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u", - ret, page_index, page_offset, from_this_page); - return ret; - } - start += from_this_page; - from_this_cache_entry -= from_this_page; - } - return 0; -} - -static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct user_sdma_iovec *iovec, - size_t from_this_iovec) -{ - while (from_this_iovec > 0) { - struct sdma_mmu_node *cache_entry; - size_t from_this_cache_entry; - size_t start; - int ret; - - start = (uintptr_t)iovec->iov.iov_base + iovec->offset; - ret = get_system_cache_entry(req, &cache_entry, start, - from_this_iovec); - if (ret) { - SDMA_DBG(req, "pin system segment failed %d", ret); - return ret; - } - - from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr); - if (from_this_cache_entry > from_this_iovec) - from_this_cache_entry = from_this_iovec; - - ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start, - from_this_cache_entry); - - /* - * Done adding cache_entry to zero or more sdma_desc. Can - * kref_put() the "safety" kref taken under - * get_system_cache_entry(). - */ - kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release); - - if (ret) { - SDMA_DBG(req, "add system segment failed %d", ret); - return ret; - } - - iovec->offset += from_this_cache_entry; - from_this_iovec -= from_this_cache_entry; - } - - return 0; -} - -static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct user_sdma_iovec *iovec, - u32 *pkt_data_remaining) -{ - size_t remaining_to_add = *pkt_data_remaining; - /* - * Walk through iovec entries, ensure the associated pages - * are pinned and mapped, add data to the packet until no more - * data remains to be added. - */ - while (remaining_to_add > 0) { - struct user_sdma_iovec *cur_iovec; - size_t from_this_iovec; - int ret; - - cur_iovec = iovec; - from_this_iovec = iovec->iov.iov_len - iovec->offset; - - if (from_this_iovec > remaining_to_add) { - from_this_iovec = remaining_to_add; - } else { - /* The current iovec entry will be consumed by this pass. */ - req->iov_idx++; - iovec++; - } - - ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec, - from_this_iovec); - if (ret) - return ret; - - remaining_to_add -= from_this_iovec; - } - *pkt_data_remaining = remaining_to_add; - - return 0; -} - -static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, - unsigned long len) -{ - return (bool)(node->addr == addr); -} - -/* - * Return 1 to remove the node from the rb tree and call the remove op. - * - * Called with the rb tree lock held. - */ -static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, - void *evict_arg, bool *stop) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - struct evict_data *evict_data = evict_arg; - - /* this node will be evicted, add its pages to our count */ - evict_data->cleared += node->npages; - - /* have enough pages been cleared? */ - if (evict_data->cleared >= evict_data->target) - *stop = true; - - return 1; /* remove this node */ -} - -static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - - free_system_node(node); -} diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 548347d4c5bc..742ec1470cc5 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* - * Copyright(c) 2020 - Cornelis Networks, Inc. + * Copyright(c) 2023 - Cornelis Networks, Inc. * Copyright(c) 2015 - 2018 Intel Corporation. */ #ifndef _HFI1_USER_SDMA_H @@ -13,6 +13,8 @@ #include "iowait.h" #include "user_exp_rcv.h" #include "mmu_rb.h" +#include "pinning.h" +#include "sdma.h" /* The maximum number of Data io vectors per message/request */ #define MAX_VECTORS_PER_REQ 8 @@ -101,13 +103,6 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; -struct sdma_mmu_node { - struct mmu_rb_node rb; - struct hfi1_user_sdma_pkt_q *pq; - struct page **pages; - unsigned int npages; -}; - struct user_sdma_iovec { struct list_head list; struct iovec iov; @@ -203,10 +198,4 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count); - -static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node) -{ - return node->rb.handler->mn.mm; -} - #endif /* _HFI1_USER_SDMA_H */ -- cgit From f5acc36b0714b7b8510a8b436087d33a65cb05f4 Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Tue, 22 Aug 2023 10:07:58 -0400 Subject: IB/hfi1: Reduce printing of errors during driver shut down The driver prints unnecessary prints for error conditions on shutdown remove them to quiet it down. Signed-off-by: Douglas Miller Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/169271327832.1855761.3756156924805531643.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/chip.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9dbb89e9f4af..e954af824f37 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1461,7 +1461,8 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, ret = write_lcb_csr(dd, csr, data); if (ret) { - dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr); + if (!(dd->flags & HFI1_SHUTDOWN)) + dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr); return 0; } @@ -6160,7 +6161,7 @@ static int request_host_lcb_access(struct hfi1_devdata *dd) ret = do_8051_command(dd, HCMD_MISC, (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, NULL); - if (ret != HCMD_SUCCESS) { + if (ret != HCMD_SUCCESS && !(dd->flags & HFI1_SHUTDOWN)) { dd_dev_err(dd, "%s: command failed with error %d\n", __func__, ret); } @@ -6241,7 +6242,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (dd->lcb_access_count == 0) { ret = request_host_lcb_access(dd); if (ret) { - dd_dev_err(dd, + if (!(dd->flags & HFI1_SHUTDOWN)) + dd_dev_err(dd, "%s: unable to acquire LCB access, err %d\n", __func__, ret); goto done; -- cgit