From dcd3f985b20ffcc375f82ca0ca9f241c7025eb5e Mon Sep 17 00:00:00 2001 From: Junji Wei Date: Tue, 31 Aug 2021 16:32:23 +0800 Subject: RDMA/rxe: Fix wrong port_cap_flags The port->attr.port_cap_flags should be set to enum ib_port_capability_mask_bits in ib_mad.h, not RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20210831083223.65797-1-weijunji@bytedance.com Signed-off-by: Junji Wei Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 742e6ec93686..b5a70cbe94aa 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -113,7 +113,7 @@ enum rxe_device_param { /* default/initial rxe port parameters */ enum rxe_port_param { RXE_PORT_GID_TBL_LEN = 1024, - RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, RXE_PORT_MAX_MSG_SZ = 0x800000, RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, -- cgit From d12faf2dee50b4171b18e67a6e30d7e145f66c56 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 14 Sep 2021 21:24:56 -0400 Subject: RDMA/rxe: remove the redundant variable The variable port is not necessary. So remove it. Link: https://lore.kernel.org/r/20210915012456.476728-1-yanjun.zhu@intel.com Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 267b5a9c345d..586ef102986e 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -29,13 +29,10 @@ static int rxe_query_port(struct ib_device *dev, u32 port_num, struct ib_port_attr *attr) { struct rxe_dev *rxe = to_rdev(dev); - struct rxe_port *port; int rc; - port = &rxe->port; - /* *attr being zeroed by the caller, avoid zeroing it here */ - *attr = port->attr; + *attr = rxe->port.attr; mutex_lock(&rxe->usdev_lock); rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, -- cgit From ad17bbef3dd573da937816edc0ab84fed6a17fa6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 15 Sep 2021 03:51:28 -0400 Subject: RDMA/rxe: remove the unnecessary variable In the struct rxe_qp, the variable send_pkts is never used. So remove it. Link: https://lore.kernel.org/r/20210915075128.482919-1-yanjun.zhu@intel.com Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 2 -- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - 2 files changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 1ab6af7ddb25..fa97ce9eaea3 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -190,8 +190,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, INIT_LIST_HEAD(&qp->grp_list); - skb_queue_head_init(&qp->send_pkts); - spin_lock_init(&qp->grp_lock); spin_lock_init(&qp->state_lock); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index ac2a2148027f..1fd53fb3a4b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -240,7 +240,6 @@ struct rxe_qp { struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; - struct sk_buff_head send_pkts; struct rxe_req_info req; struct rxe_comp_info comp; -- cgit From 9a381f7e5aa299de3500b8afa2237e5d1eab63fb Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:32 -0700 Subject: RDMA/bnxt_re: Add extended statistics counters Implement extended statistics counters for newer adapters. Check if the FW support for this command and issue the FW command only if is supported. Includes code re-organization to handle extended stats. Also, add AH and PD software counters. Link: https://lore.kernel.org/r/1631709163-2287-2-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Leon Romanovsky Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 5 +- drivers/infiniband/hw/bnxt_re/hw_counters.c | 273 ++++++++++++++++++---------- drivers/infiniband/hw/bnxt_re/hw_counters.h | 28 ++- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 16 +- drivers/infiniband/hw/bnxt_re/main.c | 4 + drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 + drivers/infiniband/hw/bnxt_re/qplib_res.h | 9 +- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 51 ++++++ drivers/infiniband/hw/bnxt_re/qplib_sp.h | 28 +++ drivers/infiniband/hw/bnxt_re/roce_hsi.h | 85 +++++++++ 10 files changed, 401 insertions(+), 101 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ba26d8e6a9c2..5b8562000778 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -39,6 +39,7 @@ #ifndef __BNXT_RE_H__ #define __BNXT_RE_H__ +#include "hw_counters.h" #define ROCE_DRV_MODULE_NAME "bnxt_re" #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" @@ -177,15 +178,17 @@ struct bnxt_re_dev { atomic_t srq_count; atomic_t mr_count; atomic_t mw_count; + atomic_t ah_count; + atomic_t pd_count; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; /* QP for for handling QP1 packets */ struct bnxt_re_gsi_context gsi_ctx; + struct bnxt_re_stats stats; atomic_t nq_alloc_cnt; u32 is_virtfn; u32 num_vfs; - struct bnxt_qplib_roce_stats stats; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 7ba07797845c..7e6dcf3dd4bb 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -58,6 +58,8 @@ #include "hw_counters.h" static const char * const bnxt_re_stat_name[] = { + [BNXT_RE_ACTIVE_PD] = "active_pds", + [BNXT_RE_ACTIVE_AH] = "active_ahs", [BNXT_RE_ACTIVE_QP] = "active_qps", [BNXT_RE_ACTIVE_SRQ] = "active_srqs", [BNXT_RE_ACTIVE_CQ] = "active_cqs", @@ -109,17 +111,154 @@ static const char * const bnxt_re_stat_name[] = { [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err", [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err", [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err", - [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count" + [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count", + [BNXT_RE_TX_ATOMIC_REQ] = "tx_atomic_req", + [BNXT_RE_TX_READ_REQ] = "tx_read_req", + [BNXT_RE_TX_READ_RES] = "tx_read_resp", + [BNXT_RE_TX_WRITE_REQ] = "tx_write_req", + [BNXT_RE_TX_SEND_REQ] = "tx_send_req", + [BNXT_RE_RX_ATOMIC_REQ] = "rx_atomic_req", + [BNXT_RE_RX_READ_REQ] = "rx_read_req", + [BNXT_RE_RX_READ_RESP] = "rx_read_resp", + [BNXT_RE_RX_WRITE_REQ] = "rx_write_req", + [BNXT_RE_RX_SEND_REQ] = "rx_send_req", + [BNXT_RE_RX_ROCE_GOOD_PKTS] = "rx_roce_good_pkts", + [BNXT_RE_RX_ROCE_GOOD_BYTES] = "rx_roce_good_bytes", + [BNXT_RE_OOB] = "rx_out_of_buffer" }; +static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats, + struct bnxt_qplib_ext_stat *s) +{ + stats->value[BNXT_RE_TX_ATOMIC_REQ] = s->tx_atomic_req; + stats->value[BNXT_RE_TX_READ_REQ] = s->tx_read_req; + stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res; + stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req; + stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req; + stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req; + stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req; + stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res; + stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req; + stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req; + stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts; + stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes; + stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer; +} + +static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats) +{ + struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat; + u32 fid; + int rc; + + fid = PCI_FUNC(rdev->en_dev->pdev->devfn); + rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat); + if (rc) + goto done; + bnxt_re_copy_ext_stats(rdev, stats, estat); + +done: + return rc; +} + +static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats, + struct bnxt_qplib_roce_stats *err_s) +{ + stats->value[BNXT_RE_TO_RETRANSMITS] = + err_s->to_retransmits; + stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] = + err_s->seq_err_naks_rcvd; + stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] = + err_s->max_retry_exceeded; + stats->value[BNXT_RE_RNR_NAKS_RCVD] = + err_s->rnr_naks_rcvd; + stats->value[BNXT_RE_MISSING_RESP] = + err_s->missing_resp; + stats->value[BNXT_RE_UNRECOVERABLE_ERR] = + err_s->unrecoverable_err; + stats->value[BNXT_RE_BAD_RESP_ERR] = + err_s->bad_resp_err; + stats->value[BNXT_RE_LOCAL_QP_OP_ERR] = + err_s->local_qp_op_err; + stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] = + err_s->local_protection_err; + stats->value[BNXT_RE_MEM_MGMT_OP_ERR] = + err_s->mem_mgmt_op_err; + stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] = + err_s->remote_invalid_req_err; + stats->value[BNXT_RE_REMOTE_ACCESS_ERR] = + err_s->remote_access_err; + stats->value[BNXT_RE_REMOTE_OP_ERR] = + err_s->remote_op_err; + stats->value[BNXT_RE_DUP_REQ] = + err_s->dup_req; + stats->value[BNXT_RE_RES_EXCEED_MAX] = + err_s->res_exceed_max; + stats->value[BNXT_RE_RES_LENGTH_MISMATCH] = + err_s->res_length_mismatch; + stats->value[BNXT_RE_RES_EXCEEDS_WQE] = + err_s->res_exceeds_wqe; + stats->value[BNXT_RE_RES_OPCODE_ERR] = + err_s->res_opcode_err; + stats->value[BNXT_RE_RES_RX_INVALID_RKEY] = + err_s->res_rx_invalid_rkey; + stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] = + err_s->res_rx_domain_err; + stats->value[BNXT_RE_RES_RX_NO_PERM] = + err_s->res_rx_no_perm; + stats->value[BNXT_RE_RES_RX_RANGE_ERR] = + err_s->res_rx_range_err; + stats->value[BNXT_RE_RES_TX_INVALID_RKEY] = + err_s->res_tx_invalid_rkey; + stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] = + err_s->res_tx_domain_err; + stats->value[BNXT_RE_RES_TX_NO_PERM] = + err_s->res_tx_no_perm; + stats->value[BNXT_RE_RES_TX_RANGE_ERR] = + err_s->res_tx_range_err; + stats->value[BNXT_RE_RES_IRRQ_OFLOW] = + err_s->res_irrq_oflow; + stats->value[BNXT_RE_RES_UNSUP_OPCODE] = + err_s->res_unsup_opcode; + stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] = + err_s->res_unaligned_atomic; + stats->value[BNXT_RE_RES_REM_INV_ERR] = + err_s->res_rem_inv_err; + stats->value[BNXT_RE_RES_MEM_ERROR] = + err_s->res_mem_error; + stats->value[BNXT_RE_RES_SRQ_ERR] = + err_s->res_srq_err; + stats->value[BNXT_RE_RES_CMP_ERR] = + err_s->res_cmp_err; + stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] = + err_s->res_invalid_dup_rkey; + stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] = + err_s->res_wqe_format_err; + stats->value[BNXT_RE_RES_CQ_LOAD_ERR] = + err_s->res_cq_load_err; + stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] = + err_s->res_srq_load_err; + stats->value[BNXT_RE_RES_TX_PCI_ERR] = + err_s->res_tx_pci_err; + stats->value[BNXT_RE_RES_RX_PCI_ERR] = + err_s->res_rx_pci_err; + stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = + err_s->res_oos_drop_count; +} + int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma; + struct ctx_hw_stats *hw_stats = NULL; + struct bnxt_qplib_roce_stats *err_s = NULL; int rc = 0; + hw_stats = rdev->qplib_ctx.stats.dma; if (!port || !stats) return -EINVAL; @@ -128,118 +267,62 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count); stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count); stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count); - if (bnxt_re_stats) { + stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count); + stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count); + + if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = - le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); + le64_to_cpu(hw_stats->tx_bcast_pkts); stats->value[BNXT_RE_RX_DROPS] = - le64_to_cpu(bnxt_re_stats->rx_error_pkts); + le64_to_cpu(hw_stats->rx_error_pkts); stats->value[BNXT_RE_RX_DISCARDS] = - le64_to_cpu(bnxt_re_stats->rx_discard_pkts); + le64_to_cpu(hw_stats->rx_discard_pkts); stats->value[BNXT_RE_RX_PKTS] = - le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); + le64_to_cpu(hw_stats->rx_ucast_pkts); stats->value[BNXT_RE_RX_BYTES] = - le64_to_cpu(bnxt_re_stats->rx_ucast_bytes); + le64_to_cpu(hw_stats->rx_ucast_bytes); stats->value[BNXT_RE_TX_PKTS] = - le64_to_cpu(bnxt_re_stats->tx_ucast_pkts); + le64_to_cpu(hw_stats->tx_ucast_pkts); stats->value[BNXT_RE_TX_BYTES] = - le64_to_cpu(bnxt_re_stats->tx_ucast_bytes); + le64_to_cpu(hw_stats->tx_ucast_bytes); } + err_s = &rdev->stats.rstat.errs; if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) { - rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats); - if (rc) + rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, err_s); + if (rc) { clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); - stats->value[BNXT_RE_TO_RETRANSMITS] = - rdev->stats.to_retransmits; - stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] = - rdev->stats.seq_err_naks_rcvd; - stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] = - rdev->stats.max_retry_exceeded; - stats->value[BNXT_RE_RNR_NAKS_RCVD] = - rdev->stats.rnr_naks_rcvd; - stats->value[BNXT_RE_MISSING_RESP] = - rdev->stats.missing_resp; - stats->value[BNXT_RE_UNRECOVERABLE_ERR] = - rdev->stats.unrecoverable_err; - stats->value[BNXT_RE_BAD_RESP_ERR] = - rdev->stats.bad_resp_err; - stats->value[BNXT_RE_LOCAL_QP_OP_ERR] = - rdev->stats.local_qp_op_err; - stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] = - rdev->stats.local_protection_err; - stats->value[BNXT_RE_MEM_MGMT_OP_ERR] = - rdev->stats.mem_mgmt_op_err; - stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] = - rdev->stats.remote_invalid_req_err; - stats->value[BNXT_RE_REMOTE_ACCESS_ERR] = - rdev->stats.remote_access_err; - stats->value[BNXT_RE_REMOTE_OP_ERR] = - rdev->stats.remote_op_err; - stats->value[BNXT_RE_DUP_REQ] = - rdev->stats.dup_req; - stats->value[BNXT_RE_RES_EXCEED_MAX] = - rdev->stats.res_exceed_max; - stats->value[BNXT_RE_RES_LENGTH_MISMATCH] = - rdev->stats.res_length_mismatch; - stats->value[BNXT_RE_RES_EXCEEDS_WQE] = - rdev->stats.res_exceeds_wqe; - stats->value[BNXT_RE_RES_OPCODE_ERR] = - rdev->stats.res_opcode_err; - stats->value[BNXT_RE_RES_RX_INVALID_RKEY] = - rdev->stats.res_rx_invalid_rkey; - stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] = - rdev->stats.res_rx_domain_err; - stats->value[BNXT_RE_RES_RX_NO_PERM] = - rdev->stats.res_rx_no_perm; - stats->value[BNXT_RE_RES_RX_RANGE_ERR] = - rdev->stats.res_rx_range_err; - stats->value[BNXT_RE_RES_TX_INVALID_RKEY] = - rdev->stats.res_tx_invalid_rkey; - stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] = - rdev->stats.res_tx_domain_err; - stats->value[BNXT_RE_RES_TX_NO_PERM] = - rdev->stats.res_tx_no_perm; - stats->value[BNXT_RE_RES_TX_RANGE_ERR] = - rdev->stats.res_tx_range_err; - stats->value[BNXT_RE_RES_IRRQ_OFLOW] = - rdev->stats.res_irrq_oflow; - stats->value[BNXT_RE_RES_UNSUP_OPCODE] = - rdev->stats.res_unsup_opcode; - stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] = - rdev->stats.res_unaligned_atomic; - stats->value[BNXT_RE_RES_REM_INV_ERR] = - rdev->stats.res_rem_inv_err; - stats->value[BNXT_RE_RES_MEM_ERROR] = - rdev->stats.res_mem_error; - stats->value[BNXT_RE_RES_SRQ_ERR] = - rdev->stats.res_srq_err; - stats->value[BNXT_RE_RES_CMP_ERR] = - rdev->stats.res_cmp_err; - stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] = - rdev->stats.res_invalid_dup_rkey; - stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] = - rdev->stats.res_wqe_format_err; - stats->value[BNXT_RE_RES_CQ_LOAD_ERR] = - rdev->stats.res_cq_load_err; - stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] = - rdev->stats.res_srq_load_err; - stats->value[BNXT_RE_RES_TX_PCI_ERR] = - rdev->stats.res_tx_pci_err; - stats->value[BNXT_RE_RES_RX_PCI_ERR] = - rdev->stats.res_rx_pci_err; - stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = - rdev->stats.res_oos_drop_count; + goto done; + } + if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && + !rdev->is_virtfn) { + rc = bnxt_re_get_ext_stat(rdev, stats); + if (rc) { + clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, + &rdev->flags); + goto done; + } + } + bnxt_re_copy_err_stats(rdev, stats, err_s); } - return ARRAY_SIZE(bnxt_re_stat_name); +done: + return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS; } struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { - BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS); + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + int num_counters = 0; + + if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + num_counters = BNXT_RE_NUM_EXT_COUNTERS; + else + num_counters = BNXT_RE_NUM_STD_COUNTERS; return rdma_alloc_hw_stats_struct(bnxt_re_stat_name, - ARRAY_SIZE(bnxt_re_stat_name), + num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index 6f2d2f91d9ff..d65be4c22730 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -41,6 +41,8 @@ #define __BNXT_RE_HW_STATS_H__ enum bnxt_re_hw_stats { + BNXT_RE_ACTIVE_PD, + BNXT_RE_ACTIVE_AH, BNXT_RE_ACTIVE_QP, BNXT_RE_ACTIVE_SRQ, BNXT_RE_ACTIVE_CQ, @@ -93,7 +95,31 @@ enum bnxt_re_hw_stats { BNXT_RE_RES_TX_PCI_ERR, BNXT_RE_RES_RX_PCI_ERR, BNXT_RE_OUT_OF_SEQ_ERR, - BNXT_RE_NUM_COUNTERS + BNXT_RE_TX_ATOMIC_REQ, + BNXT_RE_TX_READ_REQ, + BNXT_RE_TX_READ_RES, + BNXT_RE_TX_WRITE_REQ, + BNXT_RE_TX_SEND_REQ, + BNXT_RE_RX_ATOMIC_REQ, + BNXT_RE_RX_READ_REQ, + BNXT_RE_RX_READ_RESP, + BNXT_RE_RX_WRITE_REQ, + BNXT_RE_RX_SEND_REQ, + BNXT_RE_RX_ROCE_GOOD_PKTS, + BNXT_RE_RX_ROCE_GOOD_BYTES, + BNXT_RE_OOB, + BNXT_RE_NUM_EXT_COUNTERS +}; + +#define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1) + +struct bnxt_re_rstat { + struct bnxt_qplib_roce_stats errs; + struct bnxt_qplib_ext_stat ext_stat; +}; + +struct bnxt_re_stats { + struct bnxt_re_rstat rstat; }; struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 408dfbcc47b5..d09841b8d9e1 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -541,9 +541,12 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) bnxt_re_destroy_fence_mr(pd); - if (pd->qplib_pd.id) - bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, - &pd->qplib_pd); + if (pd->qplib_pd.id) { + if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res, + &rdev->qplib_res.pd_tbl, + &pd->qplib_pd)) + atomic_dec(&rdev->pd_count); + } return 0; } @@ -595,6 +598,8 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (bnxt_re_create_fence_mr(pd)) ibdev_warn(&rdev->ibdev, "Failed to create Fence-MR\n"); + atomic_inc(&rdev->pd_count); + return 0; dbfail: bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, @@ -611,6 +616,8 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, !(flags & RDMA_DESTROY_AH_SLEEPABLE)); + atomic_dec(&rdev->ah_count); + return 0; } @@ -695,6 +702,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, wmb(); /* make sure cache is updated. */ spin_unlock_irqrestore(&uctx->sh_lock, flag); } + atomic_inc(&rdev->ah_count); return 0; } @@ -760,6 +768,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) bnxt_qplib_destroy_ah(&rdev->qplib_res, &gsi_sah->qplib_ah, true); + atomic_dec(&rdev->ah_count); bnxt_qplib_clean_qp(&qp->qplib_qp); ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); @@ -1006,6 +1015,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah "Failed to allocate HW AH for Shadow QP"); goto fail; } + atomic_inc(&rdev->ah_count); return ah; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 66268e41b470..9abea36ae1a8 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -127,6 +127,8 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; + rdev->qplib_res.dattr = &rdev->dev_attr; + rdev->qplib_res.is_vf = BNXT_VF(bp); bnxt_re_set_drv_mode(rdev, wqe_mode); if (bnxt_qplib_determine_atomics(en_dev->pdev)) @@ -777,6 +779,8 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, atomic_set(&rdev->srq_count, 0); atomic_set(&rdev->mr_count, 0); atomic_set(&rdev->mw_count, 0); + atomic_set(&rdev->ah_count, 0); + atomic_set(&rdev->pd_count, 0); rdev->cosq[0] = 0xFFFF; rdev->cosq[1] = 0xFFFF; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index d4d4959c2434..d0895e61df15 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1049,6 +1049,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; + if (_is_ext_stats_supported(res->dattr->dev_cap_flags) && !res->is_vf) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED; + req.qp_flags = cpu_to_le32(qp_flags); /* ORRQ and IRRQ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 91031502e8f5..c39b20236f16 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -253,14 +253,15 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_res { struct pci_dev *pdev; struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_qplib_dev_attr *dattr; struct net_device *netdev; - struct bnxt_qplib_rcfw *rcfw; struct bnxt_qplib_pd_tbl pd_tbl; struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_pkey_tbl pkey_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; bool prio; + bool is_vf; }; static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) @@ -450,4 +451,10 @@ static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info, else bnxt_qplib_ring_db32(info, arm); } + +static inline bool _is_ext_stats_supported(u16 dev_cap_flags) +{ + return dev_cap_flags & + CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; +} #endif /* __BNXT_QPLIB_RES_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 3d9259632eb3..cbe83e9bce5c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -161,6 +161,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->l2_db_size = (sb->l2_db_space_size + 1) * (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; + attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); bnxt_qplib_query_version(rcfw, attr->fw_ver); @@ -869,3 +870,53 @@ bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); return rc; } + +int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, + struct bnxt_qplib_ext_stat *estat) +{ + struct creq_query_roce_stats_ext_resp resp = {}; + struct creq_query_roce_stats_ext_resp_sb *sb; + struct cmdq_query_roce_stats_ext req = {}; + struct bnxt_qplib_rcfw_sbuf *sbuf; + u16 cmd_flags = 0; + int rc; + + sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); + if (!sbuf) { + dev_err(&rcfw->pdev->dev, + "SP: QUERY_ROCE_STATS_EXT alloc sb failed"); + return -ENOMEM; + } + + RCFW_CMD_PREP(req, QUERY_ROCE_STATS_EXT, cmd_flags); + + req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + req.resp_addr = cpu_to_le64(sbuf->dma_addr); + req.function_id = cpu_to_le32(fid); + req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID); + + rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, + (void *)&resp, (void *)sbuf, 0); + if (rc) + goto bail; + + sb = sbuf->sb; + estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts); + estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts); + estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts); + estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts); + estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts); + estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts); + estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts); + estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts); + estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts); + estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts); + estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts); + estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes); + estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts); + estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts); + +bail: + bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + return rc; +} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 260104783691..3d5c41841668 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -71,6 +71,7 @@ struct bnxt_qplib_dev_attr { u32 l2_db_size; u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ]; bool is_atomic; + u16 dev_cap_flags; }; struct bnxt_qplib_pd { @@ -219,6 +220,30 @@ struct bnxt_qplib_roce_stats { /* port 3 active qps */ }; +struct bnxt_qplib_ext_stat { + u64 tx_atomic_req; + u64 tx_read_req; + u64 tx_read_res; + u64 tx_write_req; + u64 tx_send_req; + u64 tx_roce_pkts; + u64 tx_roce_bytes; + u64 rx_atomic_req; + u64 rx_read_req; + u64 rx_read_res; + u64 rx_write_req; + u64 rx_send_req; + u64 rx_roce_pkts; + u64 rx_roce_bytes; + u64 rx_roce_good_pkts; + u64 rx_roce_good_bytes; + u64 rx_out_of_buffer; + u64 rx_out_of_sequence; + u64 tx_cnp; + u64 rx_cnp; + u64 rx_ecn_marked; +}; + int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid); @@ -263,4 +288,7 @@ int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res, int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids); int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_roce_stats *stats); +int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, + struct bnxt_qplib_ext_stat *estat); + #endif /* __BNXT_QPLIB_SP_H__*/ diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 3e40e0d76efd..ecb719098b75 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1102,6 +1102,7 @@ struct cmdq_base { #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL + #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL u8 cmd_size; __le16 flags; __le16 cookie; @@ -1127,6 +1128,10 @@ struct cmdq_create_qp { #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL + #define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 0x80UL + #define CMDQ_CREATE_QP_QP_FLAGS_LAST \ + CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED + u8 type; #define CMDQ_CREATE_QP_TYPE_RC 0x2UL #define CMDQ_CREATE_QP_TYPE_UD 0x4UL @@ -2848,6 +2853,7 @@ struct creq_query_func_resp_sb { __le16 max_qp_wr; __le16 dev_cap_flags; #define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP 0x1UL + #define CREQ_QUERY_FUNC_RESP_SB_EXT_STATS 0x10UL __le32 max_cq; __le32 max_cqe; __le32 max_pd; @@ -3087,6 +3093,85 @@ struct creq_query_roce_stats_resp_sb { __le64 active_qp_count_p3; }; +/* cmdq_query_roce_stats_ext (size:192b/24B) */ +struct cmdq_query_roce_stats_ext { + u8 opcode; + #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 0x92UL + #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_LAST \ + CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS + u8 cmd_size; + __le16 flags; + #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_COLLECTION_ID 0x1UL + #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID 0x2UL + __le16 cookie; + u8 resp_size; + u8 collection_id; + __le64 resp_addr; + __le32 function_id; + #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_MASK 0xffUL + #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_SFT 0 + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_MASK 0xffff00UL + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT 8 + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID 0x1000000UL + __le32 reserved32; +}; + +/* creq_query_roce_stats_ext_resp (size:128b/16B) */ +struct creq_query_roce_stats_ext_resp { + u8 type; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_MASK 0x3fUL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_SFT 0 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT 0x38UL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_LAST \ + CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT + u8 status; + __le16 cookie; + __le32 size; + u8 v; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_V 0x1UL + u8 event; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 0x92UL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_LAST \ + CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT + u8 reserved48[6]; +}; + +/* creq_query_roce_stats_ext_resp_sb (size:1536b/192B) */ +struct creq_query_roce_stats_ext_resp_sb { + u8 opcode; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_LAST \ + CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT + u8 status; + __le16 cookie; + __le16 flags; + u8 resp_size; + u8 rsvd; + __le64 tx_atomic_req_pkts; + __le64 tx_read_req_pkts; + __le64 tx_read_res_pkts; + __le64 tx_write_req_pkts; + __le64 tx_send_req_pkts; + __le64 tx_roce_pkts; + __le64 tx_roce_bytes; + __le64 rx_atomic_req_pkts; + __le64 rx_read_req_pkts; + __le64 rx_read_res_pkts; + __le64 rx_write_req_pkts; + __le64 rx_send_req_pkts; + __le64 rx_roce_pkts; + __le64 rx_roce_bytes; + __le64 rx_roce_good_pkts; + __le64 rx_roce_good_bytes; + __le64 rx_out_of_buffer_pkts; + __le64 rx_out_of_sequence_pkts; + __le64 tx_cnp_pkts; + __le64 rx_cnp_pkts; + __le64 rx_ecn_marked_pkts; + __le64 tx_cnp_bytes; + __le64 rx_cnp_bytes; +}; + /* QP error notification event (16 bytes) */ struct creq_qp_error_notification { u8 type; -- cgit From 0cc4a9bdfc298c38080565df2f6e903b7b3c0c59 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:33 -0700 Subject: RDMA/bnxt_re: Update statistics counter name Update a statistics counter name as the interface structure got updated. Fixes: 9d6b648c3112 ("bnxt_en: Update firmware interface spec to 1.10.1.65.") Link: https://lore.kernel.org/r/1631709163-2287-3-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Leon Romanovsky Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 4 ++-- drivers/infiniband/hw/bnxt_re/hw_counters.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 7e6dcf3dd4bb..1c06c9cf234b 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -70,7 +70,7 @@ static const char * const bnxt_re_stat_name[] = { [BNXT_RE_TX_PKTS] = "tx_pkts", [BNXT_RE_TX_BYTES] = "tx_bytes", [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors", - [BNXT_RE_RX_DROPS] = "rx_roce_drops", + [BNXT_RE_RX_ERRORS] = "rx_roce_errors", [BNXT_RE_RX_DISCARDS] = "rx_roce_discards", [BNXT_RE_TO_RETRANSMITS] = "to_retransmits", [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", @@ -273,7 +273,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = le64_to_cpu(hw_stats->tx_bcast_pkts); - stats->value[BNXT_RE_RX_DROPS] = + stats->value[BNXT_RE_RX_ERRORS] = le64_to_cpu(hw_stats->rx_error_pkts); stats->value[BNXT_RE_RX_DISCARDS] = le64_to_cpu(hw_stats->rx_discard_pkts); diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index d65be4c22730..7943b2c393e4 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -53,7 +53,7 @@ enum bnxt_re_hw_stats { BNXT_RE_TX_PKTS, BNXT_RE_TX_BYTES, BNXT_RE_RECOVERABLE_ERRORS, - BNXT_RE_RX_DROPS, + BNXT_RE_RX_ERRORS, BNXT_RE_RX_DISCARDS, BNXT_RE_TO_RETRANSMITS, BNXT_RE_SEQ_ERR_NAKS_RCVD, -- cgit From 403bc4359a0098712b917bc4ae18349d5ae11ca3 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Wed, 15 Sep 2021 05:32:34 -0700 Subject: RDMA/bnxt_re: Use separate response buffer for stat_ctx_free Use separate buffers for the request and response data. Eventhough the response data is not used, providing the correct length is appropriate. Link: https://lore.kernel.org/r/1631709163-2287-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Edwin Peer Reviewed-by: Leon Romanovsky Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 9abea36ae1a8..2c2eeb05268f 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -525,7 +525,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, u32 fw_stats_ctx_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; - struct hwrm_stat_ctx_free_input req = {0}; + struct hwrm_stat_ctx_free_input req = {}; + struct hwrm_stat_ctx_free_output resp = {}; struct bnxt_fw_msg fw_msg; int rc = -EINVAL; @@ -539,8 +540,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1); req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); - bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req, - sizeof(req), DFLT_HWRM_CMD_TIMEOUT); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x", -- cgit From b9b43ad3ce883f6d9f0fc3c24e2a0d2d94d7eb49 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:35 -0700 Subject: RDMA/bnxt_re: Reduce the delay in polling for hwrm command completion Driver has 1ms delay between the polling for atomic command completion. Polling immediately after issuing command usually doesn't report any completions. So all commands in the blocking path needs two iterations. So effectively 1ms spend on each command. HW requires much lesser time for each command. So reduce the delay to 1us and increase the iteration count to wait for the same time. Link: https://lore.kernel.org/r/1631709163-2287-5-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Leon Romanovsky Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 5d384def5e5f..947e8c55c303 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -78,7 +78,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) if (!test_bit(cbit, cmdq->cmdq_bitmap)) goto done; do { - mdelay(1); /* 1m sec */ + udelay(1); bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); done: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 9474c0046582..82faa4e4cda8 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -96,7 +96,7 @@ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 -#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 +#define RCFW_BLOCKED_CMD_WAIT_COUNT 20000000UL /* 20 sec */ #define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL -- cgit From 6a7296c918eb5606b58632ff03d8515f61dc1d36 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:36 -0700 Subject: RDMA/bnxt_re: Support multiple page sizes HW can support multiple page sizes. Enable bits for enabling sizes from 4k to 1G by reporting page_size_cap. Link: https://lore.kernel.org/r/1631709163-2287-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 14 ++------------ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++-- 2 files changed, 4 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 5b8562000778..79401e6c6aa9 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -43,19 +43,9 @@ #define ROCE_DRV_MODULE_NAME "bnxt_re" #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" -#define BNXT_RE_PAGE_SHIFT_4K (12) -#define BNXT_RE_PAGE_SHIFT_8K (13) -#define BNXT_RE_PAGE_SHIFT_64K (16) -#define BNXT_RE_PAGE_SHIFT_2M (21) -#define BNXT_RE_PAGE_SHIFT_8M (23) -#define BNXT_RE_PAGE_SHIFT_1G (30) -#define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K) -#define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K) -#define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K) -#define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M) -#define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M) -#define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G) +#define BNXT_RE_PAGE_SHIFT_1G (30) +#define BNXT_RE_PAGE_SIZE_SUPPORTED 0x7FFFF000 /* 4kb - 1G */ #define BNXT_RE_MAX_MR_SIZE_LOW BIT_ULL(BNXT_RE_PAGE_SHIFT_1G) #define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index d09841b8d9e1..1654260611a7 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -133,7 +133,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ib_attr->sys_image_guid); ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; - ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M; + ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_SUPPORTED; ib_attr->vendor_id = rdev->en_dev->pdev->vendor; ib_attr->vendor_part_id = rdev->en_dev->pdev->device; @@ -3808,7 +3808,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->qplib_mr.va = virt_addr; page_size = ib_umem_find_best_pgsz( - umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr); + umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr); if (!page_size) { ibdev_err(&rdev->ibdev, "umem page size unsupported!"); rc = -EFAULT; -- cgit From d195ff03bf6dffb8d4bac77b328aa2602e843b9e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:37 -0700 Subject: RDMA/bnxt_re: Suppress unwanted error messages Terminal CQEs are expected during QP destroy. Avoid the unwanted error messages. Link: https://lore.kernel.org/r/1631709163-2287-7-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Leon Romanovsky Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index d0895e61df15..539b1a2f9008 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2854,6 +2854,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, struct cq_base *hw_cqe; u32 sw_cons, raw_cons; int budget, rc = 0; + u8 type; raw_cons = cq->hwq.cons; budget = num_cqes; @@ -2872,7 +2873,8 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, */ dma_rmb(); /* From the device's respective CQE format to qplib_wc*/ - switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) { + type = hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; + switch (type) { case CQ_BASE_CQE_TYPE_REQ: rc = bnxt_qplib_cq_process_req(cq, (struct cq_req *)hw_cqe, @@ -2919,8 +2921,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, /* Error while processing the CQE, just skip to the * next one */ - dev_err(&cq->hwq.pdev->dev, - "process_cqe error rc = 0x%x\n", rc); + if (type != CQ_BASE_CQE_TYPE_TERMINAL) + dev_err(&cq->hwq.pdev->dev, + "process_cqe error rc = 0x%x\n", rc); } raw_cons++; } -- cgit From 598d16fa1bf93431ad35bbab3ed1affe4fb7b562 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:38 -0700 Subject: RDMA/bnxt_re: Fix query SRQ failure Fill the missing parameters for the FW command while querying SRQ. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Link: https://lore.kernel.org/r/1631709163-2287-8-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 539b1a2f9008..ca88849559bf 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -707,12 +707,13 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, int rc = 0; RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags); - req.srq_cid = cpu_to_le32(srq->id); /* Configure the request */ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); if (!sbuf) return -ENOMEM; + req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; + req.srq_cid = cpu_to_le32(srq->id); sb = sbuf->sb; rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, (void *)sbuf, 0); -- cgit From 2b4ccce6cafae8eff0daec06b7652d9ab75f692f Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:39 -0700 Subject: RDMA/bnxt_re: Fix FRMR issue with single page MR allocation When the FRMR is allocated with single page, driver is attempting to create a level 0 HWQ and not allocating any page because the nopte field is set. This causes the crash during post_send as the pbl is not populated. To avoid this crash, check for the nopte bit during HWQ creation with single page and create a level 1 page table and populate the pbl address correctly. Link: https://lore.kernel.org/r/1631709163-2287-9-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Leon Romanovsky Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 44282a8cdd4f..bf49363f590e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -228,15 +228,16 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, npages++; } - if (npages == MAX_PBL_LVL_0_PGS) { + if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) { /* This request is Level 0, map PTE */ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo); if (rc) goto fail; hwq->level = PBL_LVL_0; + goto done; } - if (npages > MAX_PBL_LVL_0_PGS) { + if (npages >= MAX_PBL_LVL_0_PGS) { if (npages > MAX_PBL_LVL_1_PGS) { u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ? 0 : PTU_PTE_VALID; -- cgit From 690ea7fe00afe1270590a9e4211705691dcef1bd Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:40 -0700 Subject: RDMA/bnxt_re: Use GFP_KERNEL in non atomic context Use GFP_KERNEL instead of GFP_ATOMIC while allocating control path structures which will be only called from non atomic context Link: https://lore.kernel.org/r/1631709163-2287-10-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 2c2eeb05268f..4fa3b14b2613 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1730,7 +1730,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, } if (sch_work) { /* Allocate for the deferred task */ - re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC); + re_work = kzalloc(sizeof(*re_work), GFP_KERNEL); if (re_work) { get_device(&rdev->ibdev.dev); re_work->rdev = rdev; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 947e8c55c303..3de854727460 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -848,13 +848,13 @@ struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf( { struct bnxt_qplib_rcfw_sbuf *sbuf; - sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC); + sbuf = kzalloc(sizeof(*sbuf), GFP_KERNEL); if (!sbuf) return NULL; sbuf->size = size; sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size, - &sbuf->dma_addr, GFP_ATOMIC); + &sbuf->dma_addr, GFP_KERNEL); if (!sbuf->sb) goto bail; -- cgit From 7a3c3a121eb73f59f04939a14c117e884f3538e2 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:41 -0700 Subject: RDMA/bnxt_re: Correct FRMR size calculation FRMR WQE requires to provide the log2 value of the PBL and page size. Use the standard ilog2() to calculate the log2 value Link: https://lore.kernel.org/r/1631709163-2287-11-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Leon Romanovsky Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1654260611a7..938088f86d3d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2488,7 +2488,8 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, wqe->frmr.l_key = wr->key; wqe->frmr.length = wr->mr->length; - wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1; + wqe->frmr.pbl_pg_sz_log = ilog2(PAGE_SIZE >> PAGE_SHIFT_4K); + wqe->frmr.pg_sz_log = ilog2(wr->mr->page_size >> PAGE_SHIFT_4K); wqe->frmr.va = wr->mr->iova; return 0; } -- cgit From 6bda39149d4b8920fdb8744090653aca3daa792d Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Sep 2021 05:32:42 -0700 Subject: RDMA/bnxt_re: Check if the vlan is valid before reporting When VF is configured with default vlan, HW strips the vlan from the packet and driver receives it in Rx completion. VLAN needs to be reported for UD work completion only if the vlan is configured on the host. Add a check for valid vlan in the UD receive path. Link: https://lore.kernel.org/r/1631709163-2287-12-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 938088f86d3d..67ca794f64e8 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3365,8 +3365,11 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { + struct bnxt_re_dev *rdev; + u16 vlan_id = 0; u8 nw_type; + rdev = qp->rdev; wc->opcode = IB_WC_RECV; wc->status = __rc_to_ib_wc_status(cqe->status); @@ -3378,9 +3381,12 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->wc_flags |= IB_WC_WITH_SMAC; if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) { - wc->vlan_id = (cqe->cfa_meta & 0xFFF); - if (wc->vlan_id < 0x1000) - wc->wc_flags |= IB_WC_WITH_VLAN; + vlan_id = (cqe->cfa_meta & 0xFFF); + } + /* Mark only if vlan_id is non zero */ + if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { + wc->vlan_id = vlan_id; + wc->wc_flags |= IB_WC_WITH_VLAN; } nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >> CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT; -- cgit From ae6e843fe08d0ea8e158815809dcc20e3a1afc22 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 14 Sep 2021 11:42:03 -0500 Subject: RDMA/rxe: Add memory barriers to kernel queues Earlier patches added memory barriers to protect user space to kernel space communications. The user space queues were previously shown to have occasional memory synchonization errors which were removed by adding smp_load_acquire, smp_store_release barriers. This patch extends that to the case where queues are used between kernel space threads. This patch also extends the queue types to include kernel ULP queues which access the other end of the queues in kernel verbs calls like poll_cq and post_send/recv. Link: https://lore.kernel.org/r/20210914164206.19768-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 12 +- drivers/infiniband/sw/rxe/rxe_cq.c | 25 +-- drivers/infiniband/sw/rxe/rxe_qp.c | 12 +- drivers/infiniband/sw/rxe/rxe_queue.c | 30 +++- drivers/infiniband/sw/rxe/rxe_queue.h | 292 ++++++++++++++-------------------- drivers/infiniband/sw/rxe/rxe_req.c | 37 ++--- drivers/infiniband/sw/rxe/rxe_resp.c | 40 ++--- drivers/infiniband/sw/rxe/rxe_srq.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 53 ++---- 9 files changed, 189 insertions(+), 314 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index d2d802c776fd..48a3864ada29 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -142,10 +142,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp, /* we come here whether or not we found a response packet to see if * there are any posted WQEs */ - if (qp->is_user) - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER); - else - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL); + wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); *wqe_p = wqe; /* no WQE or requester has not started it yet */ @@ -432,10 +429,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) if (post) make_send_cqe(qp, wqe, &cqe); - if (qp->is_user) - advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER); - else - advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL); + queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); if (post) rxe_cq_post(qp->scq, &cqe, 0); @@ -539,7 +533,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) wqe->status = IB_WC_WR_FLUSH_ERR; do_complete(qp, wqe); } else { - advance_consumer(q, q->type); + queue_advance_consumer(q, q->type); } } } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index aef288f164fd..4eedaa0244b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -25,11 +25,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, } if (cq) { - if (cq->is_user) - count = queue_count(cq->queue, QUEUE_TYPE_TO_USER); - else - count = queue_count(cq->queue, QUEUE_TYPE_KERNEL); - + count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); if (cqe < count) { pr_warn("cqe(%d) < current # elements in queue (%d)", cqe, count); @@ -65,7 +61,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, int err; enum queue_type type; - type = uresp ? QUEUE_TYPE_TO_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_TO_CLIENT; cq->queue = rxe_queue_init(rxe, &cqe, sizeof(struct rxe_cqe), type); if (!cq->queue) { @@ -117,11 +113,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) spin_lock_irqsave(&cq->cq_lock, flags); - if (cq->is_user) - full = queue_full(cq->queue, QUEUE_TYPE_TO_USER); - else - full = queue_full(cq->queue, QUEUE_TYPE_KERNEL); - + full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT); if (unlikely(full)) { spin_unlock_irqrestore(&cq->cq_lock, flags); if (cq->ibcq.event_handler) { @@ -134,17 +126,10 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) return -EBUSY; } - if (cq->is_user) - addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER); - else - addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL); - + addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT); memcpy(addr, cqe, sizeof(*cqe)); - if (cq->is_user) - advance_producer(cq->queue, QUEUE_TYPE_TO_USER); - else - advance_producer(cq->queue, QUEUE_TYPE_KERNEL); + queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index fa97ce9eaea3..c8f4790083d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -229,7 +229,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->sq.max_inline = init->cap.max_inline_data = wqe_size; wqe_size += sizeof(struct rxe_send_wqe); - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_FROM_CLIENT; qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size, type); if (!qp->sq.queue) @@ -246,12 +246,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - if (qp->is_user) - qp->req.wqe_index = producer_index(qp->sq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->req.wqe_index = producer_index(qp->sq.queue, - QUEUE_TYPE_KERNEL); + qp->req.wqe_index = queue_get_producer(qp->sq.queue, + QUEUE_TYPE_FROM_CLIENT); qp->req.state = QP_STATE_RESET; qp->req.opcode = -1; @@ -291,7 +287,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_FROM_CLIENT; qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, type); if (!qp->rq.queue) diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index 72d95398e604..6e6e023c1b45 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -111,17 +111,33 @@ err1: static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, unsigned int num_elem) { - if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type))) + enum queue_type type = q->type; + u32 prod; + u32 cons; + + if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type))) return -EINVAL; - while (!queue_empty(q, q->type)) { - memcpy(producer_addr(new_q, new_q->type), - consumer_addr(q, q->type), - new_q->elem_size); - advance_producer(new_q, new_q->type); - advance_consumer(q, q->type); + prod = queue_get_producer(new_q, type); + cons = queue_get_consumer(q, type); + + while (!queue_empty(q, type)) { + memcpy(queue_addr_from_index(new_q, prod), + queue_addr_from_index(q, cons), new_q->elem_size); + prod = queue_next_index(new_q, prod); + cons = queue_next_index(q, cons); } + new_q->buf->producer_index = prod; + q->buf->consumer_index = cons; + + /* update private index copies */ + if (type == QUEUE_TYPE_TO_CLIENT) + new_q->index = new_q->buf->producer_index; + else + q->index = q->buf->consumer_index; + + /* exchange rxe_queue headers */ swap(*q, *new_q); return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 2702b0e55fc3..6227112ef7a2 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -10,34 +10,47 @@ /* for definition of shared struct rxe_queue_buf */ #include -/* implements a simple circular buffer that can optionally be - * shared between user space and the kernel and can be resized - * the requested element size is rounded up to a power of 2 - * and the number of elements in the buffer is also rounded - * up to a power of 2. Since the queue is empty when the - * producer and consumer indices match the maximum capacity - * of the queue is one less than the number of element slots +/* Implements a simple circular buffer that is shared between user + * and the driver and can be resized. The requested element size is + * rounded up to a power of 2 and the number of elements in the buffer + * is also rounded up to a power of 2. Since the queue is empty when + * the producer and consumer indices match the maximum capacity of the + * queue is one less than the number of element slots. * * Notes: - * - Kernel space indices are always masked off to q->index_mask - * before storing so do not need to be checked on reads. - * - User space indices may be out of range and must be - * masked before use when read. - * - The kernel indices for shared queues must not be written - * by user space so a local copy is used and a shared copy is - * stored when the local copy changes. + * - The driver indices are always masked off to q->index_mask + * before storing so do not need to be checked on reads. + * - The user whether user space or kernel is generally + * not trusted so its parameters are masked to make sure + * they do not access the queue out of bounds on reads. + * - The driver indices for queues must not be written + * by user so a local copy is used and a shared copy is + * stored when the local copy is changed. * - By passing the type in the parameter list separate from q - * the compiler can eliminate the switch statement when the - * actual queue type is known when the function is called. - * In the performance path this is done. In less critical - * paths just q->type is passed. + * the compiler can eliminate the switch statement when the + * actual queue type is known when the function is called at + * compile time. + * - These queues are lock free. The user and driver must protect + * changes to their end of the queues with locks if more than one + * CPU can be accessing it at the same time. */ -/* type of queue */ +/** + * enum queue_type - type of queue + * @QUEUE_TYPE_TO_CLIENT: Queue is written by rxe driver and + * read by client. Used by rxe driver only. + * @QUEUE_TYPE_FROM_CLIENT: Queue is written by client and + * read by rxe driver. Used by rxe driver only. + * @QUEUE_TYPE_TO_DRIVER: Queue is written by client and + * read by rxe driver. Used by kernel client only. + * @QUEUE_TYPE_FROM_DRIVER: Queue is written by rxe driver and + * read by client. Used by kernel client only. + */ enum queue_type { - QUEUE_TYPE_KERNEL, - QUEUE_TYPE_TO_USER, - QUEUE_TYPE_FROM_USER, + QUEUE_TYPE_TO_CLIENT, + QUEUE_TYPE_FROM_CLIENT, + QUEUE_TYPE_TO_DRIVER, + QUEUE_TYPE_FROM_DRIVER, }; struct rxe_queue { @@ -69,238 +82,171 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, unsigned int elem_size, struct ib_udata *udata, struct mminfo __user *outbuf, - /* Protect producers while resizing queue */ - spinlock_t *producer_lock, - /* Protect consumers while resizing queue */ - spinlock_t *consumer_lock); + spinlock_t *producer_lock, spinlock_t *consumer_lock); void rxe_queue_cleanup(struct rxe_queue *queue); -static inline int next_index(struct rxe_queue *q, int index) +static inline u32 queue_next_index(struct rxe_queue *q, int index) { - return (index + 1) & q->buf->index_mask; + return (index + 1) & q->index_mask; } -static inline int queue_empty(struct rxe_queue *q, enum queue_type type) +static inline u32 queue_get_producer(const struct rxe_queue *q, + enum queue_type type) { u32 prod; - u32 cons; switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ + case QUEUE_TYPE_FROM_CLIENT: + /* protect user index */ prod = smp_load_acquire(&q->buf->producer_index); - cons = q->index; break; - case QUEUE_TYPE_TO_USER: + case QUEUE_TYPE_TO_CLIENT: prod = q->index; - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); break; - case QUEUE_TYPE_KERNEL: + case QUEUE_TYPE_FROM_DRIVER: + /* protect driver index */ + prod = smp_load_acquire(&q->buf->producer_index); + break; + case QUEUE_TYPE_TO_DRIVER: prod = q->buf->producer_index; - cons = q->buf->consumer_index; break; } - return ((prod - cons) & q->index_mask) == 0; + return prod; } -static inline int queue_full(struct rxe_queue *q, enum queue_type type) +static inline u32 queue_get_consumer(const struct rxe_queue *q, + enum queue_type type) { - u32 prod; u32 cons; switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); + case QUEUE_TYPE_FROM_CLIENT: cons = q->index; break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - /* protect user space index */ + case QUEUE_TYPE_TO_CLIENT: + /* protect user index */ cons = smp_load_acquire(&q->buf->consumer_index); break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; + case QUEUE_TYPE_FROM_DRIVER: cons = q->buf->consumer_index; break; + case QUEUE_TYPE_TO_DRIVER: + /* protect driver index */ + cons = smp_load_acquire(&q->buf->consumer_index); + break; } - return ((prod + 1 - cons) & q->index_mask) == 0; + return cons; } -static inline unsigned int queue_count(const struct rxe_queue *q, - enum queue_type type) +static inline int queue_empty(struct rxe_queue *q, enum queue_type type) { - u32 prod; - u32 cons; - - switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - cons = q->index; - break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; - cons = q->buf->consumer_index; - break; - } + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); - return (prod - cons) & q->index_mask; + return ((prod - cons) & q->index_mask) == 0; } -static inline void advance_producer(struct rxe_queue *q, enum queue_type type) +static inline int queue_full(struct rxe_queue *q, enum queue_type type) { - u32 prod; + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); - switch (type) { - case QUEUE_TYPE_FROM_USER: - pr_warn_once("Normally kernel should not write user space index\n"); - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - prod = (prod + 1) & q->index_mask; - /* same */ - smp_store_release(&q->buf->producer_index, prod); - break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - q->index = (prod + 1) & q->index_mask; - q->buf->producer_index = q->index; - break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; - q->buf->producer_index = (prod + 1) & q->index_mask; - break; - } + return ((prod + 1 - cons) & q->index_mask) == 0; } -static inline void advance_consumer(struct rxe_queue *q, enum queue_type type) +static inline u32 queue_count(const struct rxe_queue *q, + enum queue_type type) { - u32 cons; + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); - switch (type) { - case QUEUE_TYPE_FROM_USER: - cons = q->index; - q->index = (cons + 1) & q->index_mask; - q->buf->consumer_index = q->index; - break; - case QUEUE_TYPE_TO_USER: - pr_warn_once("Normally kernel should not write user space index\n"); - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - cons = (cons + 1) & q->index_mask; - /* same */ - smp_store_release(&q->buf->consumer_index, cons); - break; - case QUEUE_TYPE_KERNEL: - cons = q->buf->consumer_index; - q->buf->consumer_index = (cons + 1) & q->index_mask; - break; - } + return (prod - cons) & q->index_mask; } -static inline void *producer_addr(struct rxe_queue *q, enum queue_type type) +static inline void queue_advance_producer(struct rxe_queue *q, + enum queue_type type) { u32 prod; switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - prod &= q->index_mask; + case QUEUE_TYPE_FROM_CLIENT: + pr_warn("%s: attempt to advance client index\n", + __func__); break; - case QUEUE_TYPE_TO_USER: + case QUEUE_TYPE_TO_CLIENT: prod = q->index; + prod = (prod + 1) & q->index_mask; + q->index = prod; + /* protect user index */ + smp_store_release(&q->buf->producer_index, prod); + break; + case QUEUE_TYPE_FROM_DRIVER: + pr_warn("%s: attempt to advance driver index\n", + __func__); break; - case QUEUE_TYPE_KERNEL: + case QUEUE_TYPE_TO_DRIVER: prod = q->buf->producer_index; + prod = (prod + 1) & q->index_mask; + q->buf->producer_index = prod; break; } - - return q->buf->data + (prod << q->log2_elem_size); } -static inline void *consumer_addr(struct rxe_queue *q, enum queue_type type) +static inline void queue_advance_consumer(struct rxe_queue *q, + enum queue_type type) { u32 cons; switch (type) { - case QUEUE_TYPE_FROM_USER: + case QUEUE_TYPE_FROM_CLIENT: cons = q->index; + cons = (cons + 1) & q->index_mask; + q->index = cons; + /* protect user index */ + smp_store_release(&q->buf->consumer_index, cons); break; - case QUEUE_TYPE_TO_USER: - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - cons &= q->index_mask; + case QUEUE_TYPE_TO_CLIENT: + pr_warn("%s: attempt to advance client index\n", + __func__); break; - case QUEUE_TYPE_KERNEL: + case QUEUE_TYPE_FROM_DRIVER: cons = q->buf->consumer_index; + cons = (cons + 1) & q->index_mask; + q->buf->consumer_index = cons; + break; + case QUEUE_TYPE_TO_DRIVER: + pr_warn("%s: attempt to advance driver index\n", + __func__); break; } - - return q->buf->data + (cons << q->log2_elem_size); } -static inline unsigned int producer_index(struct rxe_queue *q, - enum queue_type type) +static inline void *queue_producer_addr(struct rxe_queue *q, + enum queue_type type) { - u32 prod; + u32 prod = queue_get_producer(q, type); - switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - prod &= q->index_mask; - break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; - break; - } - - return prod; + return q->buf->data + (prod << q->log2_elem_size); } -static inline unsigned int consumer_index(struct rxe_queue *q, - enum queue_type type) +static inline void *queue_consumer_addr(struct rxe_queue *q, + enum queue_type type) { - u32 cons; - - switch (type) { - case QUEUE_TYPE_FROM_USER: - cons = q->index; - break; - case QUEUE_TYPE_TO_USER: - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - cons &= q->index_mask; - break; - case QUEUE_TYPE_KERNEL: - cons = q->buf->consumer_index; - break; - } + u32 cons = queue_get_consumer(q, type); - return cons; + return q->buf->data + (cons << q->log2_elem_size); } -static inline void *addr_from_index(struct rxe_queue *q, - unsigned int index) +static inline void *queue_addr_from_index(struct rxe_queue *q, u32 index) { return q->buf->data + ((index & q->index_mask) - << q->buf->log2_elem_size); + << q->log2_elem_size); } -static inline unsigned int index_from_addr(const struct rxe_queue *q, +static inline u32 queue_index_from_addr(const struct rxe_queue *q, const void *addr) { return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) @@ -309,7 +255,7 @@ static inline unsigned int index_from_addr(const struct rxe_queue *q, static inline void *queue_head(struct rxe_queue *q, enum queue_type type) { - return queue_empty(q, type) ? NULL : consumer_addr(q, type); + return queue_empty(q, type) ? NULL : queue_consumer_addr(q, type); } #endif /* RXE_QUEUE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 3894197a82f6..801e36cefc29 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -49,21 +49,16 @@ static void req_retry(struct rxe_qp *qp) unsigned int cons; unsigned int prod; - if (qp->is_user) { - cons = consumer_index(q, QUEUE_TYPE_FROM_USER); - prod = producer_index(q, QUEUE_TYPE_FROM_USER); - } else { - cons = consumer_index(q, QUEUE_TYPE_KERNEL); - prod = producer_index(q, QUEUE_TYPE_KERNEL); - } + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); qp->req.wqe_index = cons; qp->req.psn = qp->comp.psn; qp->req.opcode = -1; for (wqe_index = cons; wqe_index != prod; - wqe_index = next_index(q, wqe_index)) { - wqe = addr_from_index(qp->sq.queue, wqe_index); + wqe_index = queue_next_index(q, wqe_index)) { + wqe = queue_addr_from_index(qp->sq.queue, wqe_index); mask = wr_opcode_mask(wqe->wr.opcode, qp); if (wqe->state == wqe_state_posted) @@ -121,15 +116,9 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) unsigned int cons; unsigned int prod; - if (qp->is_user) { - wqe = queue_head(q, QUEUE_TYPE_FROM_USER); - cons = consumer_index(q, QUEUE_TYPE_FROM_USER); - prod = producer_index(q, QUEUE_TYPE_FROM_USER); - } else { - wqe = queue_head(q, QUEUE_TYPE_KERNEL); - cons = consumer_index(q, QUEUE_TYPE_KERNEL); - prod = producer_index(q, QUEUE_TYPE_KERNEL); - } + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); if (unlikely(qp->req.state == QP_STATE_DRAIN)) { /* check to see if we are drained; @@ -170,7 +159,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) if (index == prod) return NULL; - wqe = addr_from_index(q, index); + wqe = queue_addr_from_index(q, index); if (unlikely((qp->req.state == QP_STATE_DRAIN || qp->req.state == QP_STATE_DRAINED) && @@ -560,7 +549,8 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) - qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + qp->req.wqe_index = queue_next_index(qp->sq.queue, + qp->req.wqe_index); qp->need_req_skb = 0; @@ -614,7 +604,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || qp->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -645,7 +635,8 @@ next_wqe: goto exit; if (unlikely(qp->req.state == QP_STATE_RESET)) { - qp->req.wqe_index = consumer_index(q, q->type); + qp->req.wqe_index = queue_get_consumer(q, + QUEUE_TYPE_FROM_CLIENT); qp->req.opcode = -1; qp->req.need_rd_atomic = 0; qp->req.wait_psn = 0; @@ -711,7 +702,7 @@ next_wqe: wqe->last_psn = qp->req.psn; qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; - qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5501227ddc65..c5a0b1f5747a 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -303,10 +303,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) spin_lock_bh(&srq->rq.consumer_lock); - if (qp->is_user) - wqe = queue_head(q, QUEUE_TYPE_FROM_USER); - else - wqe = queue_head(q, QUEUE_TYPE_KERNEL); + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); if (!wqe) { spin_unlock_bh(&srq->rq.consumer_lock); return RESPST_ERR_RNR; @@ -322,13 +319,8 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; - if (qp->is_user) { - advance_consumer(q, QUEUE_TYPE_FROM_USER); - count = queue_count(q, QUEUE_TYPE_FROM_USER); - } else { - advance_consumer(q, QUEUE_TYPE_KERNEL); - count = queue_count(q, QUEUE_TYPE_KERNEL); - } + queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); + count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { srq->limit = 0; @@ -357,12 +349,8 @@ static enum resp_states check_resource(struct rxe_qp *qp, qp->resp.status = IB_WC_WR_FLUSH_ERR; return RESPST_COMPLETE; } else if (!srq) { - if (qp->is_user) - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_KERNEL); + qp->resp.wqe = queue_head(qp->rq.queue, + QUEUE_TYPE_FROM_CLIENT); if (qp->resp.wqe) { qp->resp.status = IB_WC_WR_FLUSH_ERR; return RESPST_COMPLETE; @@ -389,12 +377,8 @@ static enum resp_states check_resource(struct rxe_qp *qp, if (srq) return get_srq_wqe(qp); - if (qp->is_user) - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_KERNEL); + qp->resp.wqe = queue_head(qp->rq.queue, + QUEUE_TYPE_FROM_CLIENT); return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; } @@ -936,12 +920,8 @@ static enum resp_states do_complete(struct rxe_qp *qp, } /* have copy for srq and reference for !srq */ - if (!qp->srq) { - if (qp->is_user) - advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER); - else - advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL); - } + if (!qp->srq) + queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); qp->resp.wqe = NULL; @@ -1213,7 +1193,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) return; while (!qp->srq && q && queue_head(q, q->type)) - advance_consumer(q, q->type); + queue_advance_consumer(q, q->type); } int rxe_responder(void *arg) diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 610c98d24b5c..a9e7817e2732 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -93,7 +93,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, spin_lock_init(&srq->rq.producer_lock); spin_lock_init(&srq->rq.consumer_lock); - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_FROM_CLIENT; q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 586ef102986e..571cbbeed5d6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -215,11 +215,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) int num_sge = ibwr->num_sge; int full; - if (rq->is_user) - full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER); - else - full = queue_full(rq->queue, QUEUE_TYPE_KERNEL); - + full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER); if (unlikely(full)) { err = -ENOMEM; goto err1; @@ -234,11 +230,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) for (i = 0; i < num_sge; i++) length += ibwr->sg_list[i].length; - if (rq->is_user) - recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER); - else - recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL); - + recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER); recv_wqe->wr_id = ibwr->wr_id; recv_wqe->num_sge = num_sge; @@ -251,10 +243,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - if (rq->is_user) - advance_producer(rq->queue, QUEUE_TYPE_FROM_USER); - else - advance_producer(rq->queue, QUEUE_TYPE_KERNEL); + queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER); return 0; @@ -630,27 +619,17 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, spin_lock_irqsave(&qp->sq.sq_lock, flags); - if (qp->is_user) - full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER); - else - full = queue_full(sq->queue, QUEUE_TYPE_KERNEL); + full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER); if (unlikely(full)) { spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return -ENOMEM; } - if (qp->is_user) - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER); - else - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); - + send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER); init_send_wqe(qp, ibwr, mask, length, send_wqe); - if (qp->is_user) - advance_producer(sq->queue, QUEUE_TYPE_FROM_USER); - else - advance_producer(sq->queue, QUEUE_TYPE_KERNEL); + queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); @@ -842,18 +821,12 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->cq_lock, flags); for (i = 0; i < num_entries; i++) { - if (cq->is_user) - cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER); - else - cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); + cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER); if (!cqe) break; memcpy(wc++, &cqe->ibwc, sizeof(*wc)); - if (cq->is_user) - advance_consumer(cq->queue, QUEUE_TYPE_TO_USER); - else - advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); + queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER); } spin_unlock_irqrestore(&cq->cq_lock, flags); @@ -865,10 +838,7 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) struct rxe_cq *cq = to_rcq(ibcq); int count; - if (cq->is_user) - count = queue_count(cq->queue, QUEUE_TYPE_TO_USER); - else - count = queue_count(cq->queue, QUEUE_TYPE_KERNEL); + count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER); return (count > wc_cnt) ? wc_cnt : count; } @@ -884,10 +854,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; - if (cq->is_user) - empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER); - else - empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL); + empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER); if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) ret = 1; -- cgit From 47b7f7064b078e7cf3f5a51396d60d8e16a49874 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 14 Sep 2021 11:42:04 -0500 Subject: RDMA/rxe: Cleanup MR status and type enums Eliminate RXE_MR_STATE_ZOMBIE which is not compatible with IBA. RXE_MR_STATE_INVALID is better. Replace RXE_MR_TYPE_XXX by IB_MR_TYPE_XXX which covers all the needed types. Link: https://lore.kernel.org/r/20210914164206.19768-3-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 20 ++++++++++++-------- drivers/infiniband/sw/rxe/rxe_verbs.h | 9 +-------- 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 5890a8246216..0cc24154762c 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -24,17 +24,22 @@ u8 rxe_get_next_key(u32 last_key) int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { + + switch (mr->type) { - case RXE_MR_TYPE_DMA: + case IB_MR_TYPE_DMA: return 0; - case RXE_MR_TYPE_MR: + case IB_MR_TYPE_USER: + case IB_MR_TYPE_MEM_REG: if (iova < mr->iova || length > mr->length || iova > mr->iova + mr->length - length) return -EFAULT; return 0; default: + pr_warn("%s: mr type (%d) not supported\n", + __func__, mr->type); return -EFAULT; } } @@ -51,7 +56,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) mr->ibmr.lkey = lkey; mr->ibmr.rkey = rkey; mr->state = RXE_MR_STATE_INVALID; - mr->type = RXE_MR_TYPE_NONE; mr->map_shift = ilog2(RXE_BUF_PER_MAP); } @@ -100,7 +104,7 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) mr->ibmr.pd = &pd->ibpd; mr->access = access; mr->state = RXE_MR_STATE_VALID; - mr->type = RXE_MR_TYPE_DMA; + mr->type = IB_MR_TYPE_DMA; } int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, @@ -173,7 +177,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, mr->va = start; mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; - mr->type = RXE_MR_TYPE_MR; + mr->type = IB_MR_TYPE_USER; return 0; @@ -203,7 +207,7 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) mr->ibmr.pd = &pd->ibpd; mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; - mr->type = RXE_MR_TYPE_MR; + mr->type = IB_MR_TYPE_MEM_REG; return 0; @@ -302,7 +306,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, if (length == 0) return 0; - if (mr->type == RXE_MR_TYPE_DMA) { + if (mr->type == IB_MR_TYPE_DMA) { u8 *src, *dest; src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); @@ -564,7 +568,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return -EINVAL; } - mr->state = RXE_MR_STATE_ZOMBIE; + mr->state = RXE_MR_STATE_INVALID; rxe_drop_ref(mr_pd(mr)); rxe_drop_index(mr); rxe_drop_ref(mr); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 1fd53fb3a4b3..f2176a59bd10 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -266,18 +266,11 @@ struct rxe_qp { }; enum rxe_mr_state { - RXE_MR_STATE_ZOMBIE, RXE_MR_STATE_INVALID, RXE_MR_STATE_FREE, RXE_MR_STATE_VALID, }; -enum rxe_mr_type { - RXE_MR_TYPE_NONE, - RXE_MR_TYPE_DMA, - RXE_MR_TYPE_MR, -}; - enum rxe_mr_copy_dir { RXE_TO_MR_OBJ, RXE_FROM_MR_OBJ, @@ -313,7 +306,7 @@ struct rxe_mr { struct ib_umem *umem; enum rxe_mr_state state; - enum rxe_mr_type type; + enum ib_mr_type type; u64 va; u64 iova; size_t length; -- cgit From 001345339f4ca85790a1644a74e33ae77ac116be Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 14 Sep 2021 11:42:05 -0500 Subject: RDMA/rxe: Separate HW and SW l/rkeys Separate software and simulated hardware lkeys and rkeys for MRs and MWs. This makes struct ib_mr and struct ib_mw isolated from hardware changes triggered by executing work requests. This change fixes a bug seen in blktest. Link: https://lore.kernel.org/r/20210914164206.19768-4-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mr.c | 69 +++++++++++++++++++++++++++++------ drivers/infiniband/sw/rxe/rxe_mw.c | 30 +++++++-------- drivers/infiniband/sw/rxe/rxe_req.c | 14 +++---- drivers/infiniband/sw/rxe/rxe_verbs.h | 18 ++------- 5 files changed, 81 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index f0c954575bde..4fd73b51fabf 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -86,6 +86,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey); +int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); void rxe_mr_cleanup(struct rxe_pool_entry *arg); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 0cc24154762c..370212801abc 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -53,8 +53,14 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; - mr->ibmr.lkey = lkey; - mr->ibmr.rkey = rkey; + /* set ibmr->l/rkey and also copy into private l/rkey + * for user MRs these will always be the same + * for cases where caller 'owns' the key portion + * they may be different until REG_MR WQE is executed. + */ + mr->lkey = mr->ibmr.lkey = lkey; + mr->rkey = mr->ibmr.rkey = rkey; + mr->state = RXE_MR_STATE_INVALID; mr->map_shift = ilog2(RXE_BUF_PER_MAP); } @@ -195,10 +201,8 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) { int err; - rxe_mr_init(0, mr); - - /* In fastreg, we also set the rkey */ - mr->ibmr.rkey = mr->ibmr.lkey; + /* always allow remote access for FMRs */ + rxe_mr_init(IB_ACCESS_REMOTE, mr); err = rxe_mr_alloc(mr, max_pages); if (err) @@ -511,8 +515,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, if (!mr) return NULL; - if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) || - (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) || + if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || + (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || mr_pd(mr) != pd || (access && !(access & mr->access)) || mr->state != RXE_MR_STATE_VALID)) { rxe_drop_ref(mr); @@ -535,9 +539,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) goto err; } - if (rkey != mr->ibmr.rkey) { - pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n", - __func__, rkey, mr->ibmr.rkey); + if (rkey != mr->rkey) { + pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n", + __func__, rkey, mr->rkey); ret = -EINVAL; goto err_drop_ref; } @@ -558,6 +562,49 @@ err: return ret; } +/* user can (re)register fast MR by executing a REG_MR WQE. + * user is expected to hold a reference on the ib mr until the + * WQE completes. + * Once a fast MR is created this is the only way to change the + * private keys. It is the responsibility of the user to maintain + * the ib mr keys in sync with rxe mr keys. + */ +int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); + u32 key = wqe->wr.wr.reg.key; + u32 access = wqe->wr.wr.reg.access; + + /* user can only register MR in free state */ + if (unlikely(mr->state != RXE_MR_STATE_FREE)) { + pr_warn("%s: mr->lkey = 0x%x not free\n", + __func__, mr->lkey); + return -EINVAL; + } + + /* user can only register mr with qp in same protection domain */ + if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { + pr_warn("%s: qp->pd and mr->pd don't match\n", + __func__); + return -EINVAL; + } + + /* user is only allowed to change key portion of l/rkey */ + if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { + pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", + __func__, key, mr->lkey); + return -EINVAL; + } + + mr->access = access; + mr->lkey = key; + mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; + mr->iova = wqe->wr.wr.reg.mr->iova; + mr->state = RXE_MR_STATE_VALID; + + return 0; +} + int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mr *mr = to_rmr(ibmr); diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 5ba77df7598e..a5e2ea7d80f0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -21,7 +21,7 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) } rxe_add_index(mw); - ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1); + mw->rkey = ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1); mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ? RXE_MW_STATE_FREE : RXE_MW_STATE_VALID; spin_lock_init(&mw->lock); @@ -71,6 +71,8 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_mw *mw, struct rxe_mr *mr) { + u32 key = wqe->wr.wr.mw.rkey & 0xff; + if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { pr_err_once( @@ -108,7 +110,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } } - if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) { + if (unlikely(key == (mw->rkey & 0xff))) { pr_err_once("attempt to bind MW with same key\n"); return -EINVAL; } @@ -161,13 +163,9 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_mw *mw, struct rxe_mr *mr) { - u32 rkey; - u32 new_rkey; - - rkey = mw->ibmw.rkey; - new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff); + u32 key = wqe->wr.wr.mw.rkey & 0xff; - mw->ibmw.rkey = new_rkey; + mw->rkey = (mw->rkey & ~0xff) | key; mw->access = wqe->wr.wr.mw.access; mw->state = RXE_MW_STATE_VALID; mw->addr = wqe->wr.wr.mw.addr; @@ -197,29 +195,29 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) struct rxe_mw *mw; struct rxe_mr *mr; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u32 mw_rkey = wqe->wr.wr.mw.mw_rkey; + u32 mr_lkey = wqe->wr.wr.mw.mr_lkey; unsigned long flags; - mw = rxe_pool_get_index(&rxe->mw_pool, - wqe->wr.wr.mw.mw_rkey >> 8); + mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8); if (unlikely(!mw)) { ret = -EINVAL; goto err; } - if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) { + if (unlikely(mw->rkey != mw_rkey)) { ret = -EINVAL; goto err_drop_mw; } if (likely(wqe->wr.wr.mw.length)) { - mr = rxe_pool_get_index(&rxe->mr_pool, - wqe->wr.wr.mw.mr_lkey >> 8); + mr = rxe_pool_get_index(&rxe->mr_pool, mr_lkey >> 8); if (unlikely(!mr)) { ret = -EINVAL; goto err_drop_mw; } - if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) { + if (unlikely(mr->lkey != mr_lkey)) { ret = -EINVAL; goto err_drop_mr; } @@ -292,7 +290,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) goto err; } - if (rkey != mw->ibmw.rkey) { + if (rkey != mw->rkey) { ret = -EINVAL; goto err_drop_ref; } @@ -323,7 +321,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) if (!mw) return NULL; - if (unlikely((rxe_mw_rkey(mw) != rkey) || rxe_mw_pd(mw) != pd || + if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd || (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) || (mw->length == 0) || (access && !(access & mw->access)) || diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 801e36cefc29..2981b3ef3cc0 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -562,7 +562,6 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { u8 opcode = wqe->wr.opcode; - struct rxe_mr *mr; u32 rkey; int ret; @@ -580,14 +579,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) } break; case IB_WR_REG_MR: - mr = to_rmr(wqe->wr.wr.reg.mr); - rxe_add_ref(mr); - mr->state = RXE_MR_STATE_VALID; - mr->access = wqe->wr.wr.reg.access; - mr->ibmr.lkey = wqe->wr.wr.reg.key; - mr->ibmr.rkey = wqe->wr.wr.reg.key; - mr->iova = wqe->wr.wr.reg.mr->iova; - rxe_drop_ref(mr); + ret = rxe_reg_fast_mr(qp, wqe); + if (unlikely(ret)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + return ret; + } break; case IB_WR_BIND_MW: ret = rxe_bind_mw(qp, wqe); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index f2176a59bd10..b330e56609af 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -305,6 +305,8 @@ struct rxe_mr { struct ib_umem *umem; + u32 lkey; + u32 rkey; enum rxe_mr_state state; enum ib_mr_type type; u64 va; @@ -342,6 +344,7 @@ struct rxe_mw { enum rxe_mw_state state; struct rxe_qp *qp; /* Type 2 only */ struct rxe_mr *mr; + u32 rkey; int access; u64 addr; u64 length; @@ -466,26 +469,11 @@ static inline struct rxe_pd *mr_pd(struct rxe_mr *mr) return to_rpd(mr->ibmr.pd); } -static inline u32 mr_lkey(struct rxe_mr *mr) -{ - return mr->ibmr.lkey; -} - -static inline u32 mr_rkey(struct rxe_mr *mr) -{ - return mr->ibmr.rkey; -} - static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) { return to_rpd(mw->ibmw.pd); } -static inline u32 rxe_mw_rkey(struct rxe_mw *mw) -{ - return mw->ibmw.rkey; -} - int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); void rxe_mc_cleanup(struct rxe_pool_entry *arg); -- cgit From 647bf13ce944f20f7402f281578423a952274e4a Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 14 Sep 2021 11:42:06 -0500 Subject: RDMA/rxe: Create duplicate mapping tables for FMRs For fast memory regions create duplicate mapping tables so ib_map_mr_sg() can build a new mapping table which is then swapped into place synchronously with the execution of an IB_WR_REG_MR work request. Currently the rxe driver uses the same table for receiving RDMA operations and for building new tables in preparation for reusing the MR. This exposes users to potentially incorrect results. Link: https://lore.kernel.org/r/20210914164206.19768-5-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mr.c | 196 +++++++++++++++++++++++----------- drivers/infiniband/sw/rxe/rxe_mw.c | 6 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 39 +++---- drivers/infiniband/sw/rxe/rxe_verbs.h | 21 ++-- 5 files changed, 161 insertions(+), 102 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 4fd73b51fabf..1ca43b859d80 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -87,6 +87,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey); int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); +int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr); int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); void rxe_mr_cleanup(struct rxe_pool_entry *arg); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 370212801abc..8d658d42abed 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -24,7 +24,7 @@ u8 rxe_get_next_key(u32 last_key) int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { - + struct rxe_map_set *set = mr->cur_map_set; switch (mr->type) { case IB_MR_TYPE_DMA: @@ -32,8 +32,8 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) case IB_MR_TYPE_USER: case IB_MR_TYPE_MEM_REG: - if (iova < mr->iova || length > mr->length || - iova > mr->iova + mr->length - length) + if (iova < set->iova || length > set->length || + iova > set->iova + set->length - length) return -EFAULT; return 0; @@ -65,41 +65,89 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) mr->map_shift = ilog2(RXE_BUF_PER_MAP); } -static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) +static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set) { int i; - int num_map; - struct rxe_map **map = mr->map; - num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; + for (i = 0; i < num_map; i++) + kfree(set->map[i]); - mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); - if (!mr->map) - goto err1; + kfree(set->map); + kfree(set); +} + +static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp) +{ + int i; + struct rxe_map_set *set; + + set = kmalloc(sizeof(*set), GFP_KERNEL); + if (!set) + goto err_out; + + set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL); + if (!set->map) + goto err_free_set; for (i = 0; i < num_map; i++) { - mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); - if (!mr->map[i]) - goto err2; + set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL); + if (!set->map[i]) + goto err_free_map; } + *setp = set; + + return 0; + +err_free_map: + for (i--; i >= 0; i--) + kfree(set->map[i]); + + kfree(set->map); +err_free_set: + kfree(set); +err_out: + return -ENOMEM; +} + +/** + * rxe_mr_alloc() - Allocate memory map array(s) for MR + * @mr: Memory region + * @num_buf: Number of buffer descriptors to support + * @both: If non zero allocate both mr->map and mr->next_map + * else just allocate mr->map. Used for fast MRs + * + * Return: 0 on success else an error + */ +static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both) +{ + int ret; + int num_map; + BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; mr->map_shift = ilog2(RXE_BUF_PER_MAP); mr->map_mask = RXE_BUF_PER_MAP - 1; - mr->num_buf = num_buf; - mr->num_map = num_map; mr->max_buf = num_map * RXE_BUF_PER_MAP; + mr->num_map = num_map; - return 0; + ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set); + if (ret) + goto err_out; -err2: - for (i--; i >= 0; i--) - kfree(mr->map[i]); + if (both) { + ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set); + if (ret) { + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); + goto err_out; + } + } - kfree(mr->map); -err1: + return 0; + +err_out: return -ENOMEM; } @@ -116,6 +164,7 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { + struct rxe_map_set *set; struct rxe_map **map; struct rxe_phys_buf *buf = NULL; struct ib_umem *umem; @@ -123,7 +172,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; - int i; umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { @@ -137,18 +185,20 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, rxe_mr_init(access, mr); - err = rxe_mr_alloc(mr, num_buf); + err = rxe_mr_alloc(mr, num_buf, 0); if (err) { pr_warn("%s: Unable to allocate memory for map\n", __func__); goto err_release_umem; } - mr->page_shift = PAGE_SHIFT; - mr->page_mask = PAGE_SIZE - 1; + set = mr->cur_map_set; + set->page_shift = PAGE_SHIFT; + set->page_mask = PAGE_SIZE - 1; + + num_buf = 0; + map = set->map; - num_buf = 0; - map = mr->map; if (length > 0) { buf = map[0]->buf; @@ -171,26 +221,24 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, buf->size = PAGE_SIZE; num_buf++; buf++; - } } mr->ibmr.pd = &pd->ibpd; mr->umem = umem; mr->access = access; - mr->length = length; - mr->iova = iova; - mr->va = start; - mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; mr->type = IB_MR_TYPE_USER; + set->length = length; + set->iova = iova; + set->va = start; + set->offset = ib_umem_offset(umem); + return 0; err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); err_release_umem: ib_umem_release(umem); err_out: @@ -204,7 +252,7 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) /* always allow remote access for FMRs */ rxe_mr_init(IB_ACCESS_REMOTE, mr); - err = rxe_mr_alloc(mr, max_pages); + err = rxe_mr_alloc(mr, max_pages, 1); if (err) goto err1; @@ -222,21 +270,24 @@ err1: static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, size_t *offset_out) { - size_t offset = iova - mr->iova + mr->offset; + struct rxe_map_set *set = mr->cur_map_set; + size_t offset = iova - set->iova + set->offset; int map_index; int buf_index; u64 length; + struct rxe_map *map; - if (likely(mr->page_shift)) { - *offset_out = offset & mr->page_mask; - offset >>= mr->page_shift; + if (likely(set->page_shift)) { + *offset_out = offset & set->page_mask; + offset >>= set->page_shift; *n_out = offset & mr->map_mask; *m_out = offset >> mr->map_shift; } else { map_index = 0; buf_index = 0; - length = mr->map[map_index]->buf[buf_index].size; + map = set->map[map_index]; + length = map->buf[buf_index].size; while (offset >= length) { offset -= length; @@ -246,7 +297,8 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, map_index++; buf_index = 0; } - length = mr->map[map_index]->buf[buf_index].size; + map = set->map[map_index]; + length = map->buf[buf_index].size; } *m_out = map_index; @@ -267,7 +319,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) goto out; } - if (!mr->map) { + if (!mr->cur_map_set) { addr = (void *)(uintptr_t)iova; goto out; } @@ -280,13 +332,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); - if (offset + length > mr->map[m]->buf[n].size) { + if (offset + length > mr->cur_map_set->map[m]->buf[n].size) { pr_warn("crosses page boundary\n"); addr = NULL; goto out; } - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; + addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset; out: return addr; @@ -322,7 +374,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, return 0; } - WARN_ON_ONCE(!mr->map); + WARN_ON_ONCE(!mr->cur_map_set); err = mr_check_range(mr, iova, length); if (err) { @@ -332,7 +384,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, lookup_iova(mr, iova, &m, &i, &offset); - map = mr->map + m; + map = mr->cur_map_set->map + m; buf = map[0]->buf + i; while (length > 0) { @@ -572,8 +624,9 @@ err: int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); - u32 key = wqe->wr.wr.reg.key; + u32 key = wqe->wr.wr.reg.key & 0xff; u32 access = wqe->wr.wr.reg.access; + struct rxe_map_set *set; /* user can only register MR in free state */ if (unlikely(mr->state != RXE_MR_STATE_FREE)) { @@ -589,19 +642,36 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) return -EINVAL; } - /* user is only allowed to change key portion of l/rkey */ - if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { - pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", - __func__, key, mr->lkey); - return -EINVAL; - } - mr->access = access; - mr->lkey = key; - mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; - mr->iova = wqe->wr.wr.reg.mr->iova; + mr->lkey = (mr->lkey & ~0xff) | key; + mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0; mr->state = RXE_MR_STATE_VALID; + set = mr->cur_map_set; + mr->cur_map_set = mr->next_map_set; + mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova; + mr->next_map_set = set; + + return 0; +} + +int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct rxe_mr *mr = to_rmr(ibmr); + struct rxe_map_set *set = mr->next_map_set; + struct rxe_map *map; + struct rxe_phys_buf *buf; + + if (unlikely(set->nbuf == mr->num_buf)) + return -ENOMEM; + + map = set->map[set->nbuf / RXE_BUF_PER_MAP]; + buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP]; + + buf->addr = addr; + buf->size = ibmr->page_size; + set->nbuf++; + return 0; } @@ -626,14 +696,12 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) void rxe_mr_cleanup(struct rxe_pool_entry *arg) { struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); - int i; ib_umem_release(mr->umem); - if (mr->map) { - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); + if (mr->cur_map_set) + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); - kfree(mr->map); - } + if (mr->next_map_set) + rxe_mr_free_map_set(mr->num_map, mr->next_map_set); } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index a5e2ea7d80f0..9534a7fe1a98 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -142,15 +142,15 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, /* C10-75 */ if (mw->access & IB_ZERO_BASED) { - if (unlikely(wqe->wr.wr.mw.length > mr->length)) { + if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) { pr_err_once( "attempt to bind a ZB MW outside of the MR\n"); return -EINVAL; } } else { - if (unlikely((wqe->wr.wr.mw.addr < mr->iova) || + if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) || ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > - (mr->iova + mr->length)))) { + (mr->cur_map_set->iova + mr->cur_map_set->length)))) { pr_err_once( "attempt to bind a VA MW outside of the MR\n"); return -EINVAL; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 571cbbeed5d6..9d0bb9aa7514 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -951,41 +951,26 @@ err1: return ERR_PTR(err); } -static int rxe_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct rxe_mr *mr = to_rmr(ibmr); - struct rxe_map *map; - struct rxe_phys_buf *buf; - - if (unlikely(mr->nbuf == mr->num_buf)) - return -ENOMEM; - - map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; - - buf->addr = addr; - buf->size = ibmr->page_size; - mr->nbuf++; - - return 0; -} - +/* build next_map_set from scatterlist + * The IB_WR_REG_MR WR will swap map_sets + */ static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct rxe_mr *mr = to_rmr(ibmr); + struct rxe_map_set *set = mr->next_map_set; int n; - mr->nbuf = 0; + set->nbuf = 0; - n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_mr_set_page); - mr->va = ibmr->iova; - mr->iova = ibmr->iova; - mr->length = ibmr->length; - mr->page_shift = ilog2(ibmr->page_size); - mr->page_mask = ibmr->page_size - 1; - mr->offset = mr->iova & mr->page_mask; + set->va = ibmr->iova; + set->iova = ibmr->iova; + set->length = ibmr->length; + set->page_shift = ilog2(ibmr->page_size); + set->page_mask = ibmr->page_size - 1; + set->offset = set->iova & set->page_mask; return n; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index b330e56609af..c807639435eb 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -292,6 +292,17 @@ struct rxe_map { struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; }; +struct rxe_map_set { + struct rxe_map **map; + u64 va; + u64 iova; + size_t length; + u32 offset; + u32 nbuf; + int page_shift; + int page_mask; +}; + static inline int rkey_is_mw(u32 rkey) { u32 index = rkey >> 8; @@ -309,26 +320,20 @@ struct rxe_mr { u32 rkey; enum rxe_mr_state state; enum ib_mr_type type; - u64 va; - u64 iova; - size_t length; - u32 offset; int access; - int page_shift; - int page_mask; int map_shift; int map_mask; u32 num_buf; - u32 nbuf; u32 max_buf; u32 num_map; atomic_t num_mw; - struct rxe_map **map; + struct rxe_map_set *cur_map_set; + struct rxe_map_set *next_map_set; }; enum rxe_mw_state { -- cgit From 450f4f6aa1a369cc3ffadc1c7e27dfab3e90199f Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 14 Sep 2021 11:42:07 -0500 Subject: RDMA/rxe: Only allow invalidate for appropriate MRs Local and remote invalidate operations are not allowed by IBA for MRs created by (re)register memory verbs. This patch checks the MR type in rxe_invalidate_mr(). Link: https://lore.kernel.org/r/20210914164206.19768-6-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 8d658d42abed..53271df10e47 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -605,6 +605,12 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) goto err_drop_ref; } + if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { + pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); + ret = -EINVAL; + goto err_drop_ref; + } + mr->state = RXE_MR_STATE_FREE; ret = 0; -- cgit From d47dfc2b00e69001c8eeae71f7e25066ccc36144 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 13 Sep 2021 09:28:26 -0400 Subject: IB/hfi1: Remove cache and embed txreq in ring This patch removes kmem cache allocation and deallocation in favor of having the ipoib_txreq in the ring. The consumer is now the packet sending side allocating tx descriptors from ring and the producer is the napi interrupt handling freeing tx descriptors. The locks are now eliminated because the napi tx lock insures a single consumer and the napi handling insures a single producer. The napi poll is converted to memory poll looking for items that have been marked completed. Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets") Link: https://lore.kernel.org/r/20210913132826.131370.4397.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib.h | 17 ++- drivers/infiniband/hw/hfi1/ipoib_tx.c | 212 +++++++++++++++------------------- 2 files changed, 101 insertions(+), 128 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 2cff38b105ac..4e91b7039293 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -45,21 +45,19 @@ union hfi1_ipoib_flow { /** * struct hfi1_ipoib_circ_buf - List of items to be processed - * @items: ring of items + * @items: ring of items each a power of two size * @head: ring head * @tail: ring tail * @max_items: max items + 1 that the ring can contain - * @producer_lock: producer sync lock - * @consumer_lock: consumer sync lock + * @shift: log2 of size for getting txreq */ struct ipoib_txreq; struct hfi1_ipoib_circ_buf { - struct ipoib_txreq **items; - unsigned long head; - unsigned long tail; - unsigned long max_items; - spinlock_t producer_lock; /* head sync lock */ - spinlock_t consumer_lock; /* tail sync lock */ + void *items; + u32 head; + u32 tail; + u32 max_items; + u32 shift; }; /** @@ -102,7 +100,6 @@ struct hfi1_ipoib_dev_priv { struct net_device *netdev; struct ib_device *device; struct hfi1_ipoib_txq *txqs; - struct kmem_cache *txreq_cache; struct napi_struct *tx_napis; u16 pkey; u16 pkey_index; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index e74ddbe46589..0a5d32721dfe 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -27,6 +27,7 @@ * @txreq: sdma transmit request * @sdma_hdr: 9b ib headers * @sdma_status: status returned by sdma engine + * @complete: non-zero implies complete * @priv: ipoib netdev private data * @txq: txq on which skb was output * @skb: skb to send @@ -35,6 +36,7 @@ struct ipoib_txreq { struct sdma_txreq txreq; struct hfi1_sdma_header sdma_hdr; int sdma_status; + int complete; struct hfi1_ipoib_dev_priv *priv; struct hfi1_ipoib_txq *txq; struct sk_buff *skb; @@ -51,7 +53,13 @@ struct ipoib_txparms { u8 entropy; }; -static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) +static struct ipoib_txreq * +hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx) +{ + return (struct ipoib_txreq *)(r->items + (idx << r->shift)); +} + +static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) { return sent - completed; } @@ -139,51 +147,55 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) } napi_consume_skb(tx->skb, budget); + tx->skb = NULL; sdma_txclean(priv->dd, &tx->txreq); - kmem_cache_free(priv->txreq_cache, tx); } -static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget) +static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) { struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; - unsigned long head; - unsigned long tail; - unsigned int max_tx; - int work_done; - int tx_count; - - spin_lock_bh(&tx_ring->consumer_lock); - - /* Read index before reading contents at that index. */ - head = smp_load_acquire(&tx_ring->head); - tail = tx_ring->tail; - max_tx = tx_ring->max_items; - - work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget); + int i; + struct ipoib_txreq *tx; - for (tx_count = work_done; tx_count; tx_count--) { - hfi1_ipoib_free_tx(tx_ring->items[tail], budget); - tail = CIRC_NEXT(tail, max_tx); + for (i = 0; i < tx_ring->max_items; i++) { + tx = hfi1_txreq_from_idx(tx_ring, i); + tx->complete = 0; + dev_kfree_skb_any(tx->skb); + tx->skb = NULL; + sdma_txclean(txq->priv->dd, &tx->txreq); } - - atomic64_add(work_done, &txq->complete_txreqs); - - /* Finished freeing tx items so store the tail value. */ - smp_store_release(&tx_ring->tail, tail); - - spin_unlock_bh(&tx_ring->consumer_lock); - - hfi1_ipoib_check_queue_stopped(txq); - - return work_done; + tx_ring->head = 0; + tx_ring->tail = 0; + atomic64_set(&txq->complete_txreqs, 0); + txq->sent_txreqs = 0; } -static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) +static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; + u32 head = tx_ring->head; + u32 max_tx = tx_ring->max_items; + int work_done; + struct ipoib_txreq *tx = hfi1_txreq_from_idx(tx_ring, head); - int work_done = hfi1_ipoib_drain_tx_ring(txq, budget); + trace_hfi1_txq_poll(txq); + for (work_done = 0; work_done < budget; work_done++) { + /* See hfi1_ipoib_sdma_complete() */ + if (!smp_load_acquire(&tx->complete)) + break; + tx->complete = 0; + hfi1_ipoib_free_tx(tx, budget); + head = CIRC_NEXT(head, max_tx); + tx = hfi1_txreq_from_idx(tx_ring, head); + } + atomic64_add(work_done, &txq->complete_txreqs); + + /* Finished freeing tx items so store the head value. */ + smp_store_release(&tx_ring->head, head); + + hfi1_ipoib_check_queue_stopped(txq); if (work_done < budget) napi_complete_done(napi, work_done); @@ -191,45 +203,15 @@ static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) return work_done; } -static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx) -{ - struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring; - unsigned long head; - unsigned long tail; - size_t max_tx; - - spin_lock(&tx_ring->producer_lock); - - head = tx_ring->head; - tail = READ_ONCE(tx_ring->tail); - max_tx = tx_ring->max_items; - - if (likely(CIRC_SPACE(head, tail, max_tx))) { - tx_ring->items[head] = tx; - - /* Finish storing txreq before incrementing head. */ - smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx)); - napi_schedule_irqoff(tx->txq->napi); - } else { - struct hfi1_ipoib_txq *txq = tx->txq; - struct hfi1_ipoib_dev_priv *priv = tx->priv; - - /* Ring was full */ - hfi1_ipoib_free_tx(tx, 0); - atomic64_inc(&txq->complete_txreqs); - dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx); - } - - spin_unlock(&tx_ring->producer_lock); -} - static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) { struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); + trace_hfi1_txq_complete(tx->txq); tx->sdma_status = status; - - hfi1_ipoib_add_tx(tx); + /* see hfi1_ipoib_poll_tx_ring */ + smp_store_release(&tx->complete, 1); + napi_schedule_irqoff(tx->txq->napi); } static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, @@ -385,19 +367,24 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, struct ipoib_txparms *txp) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct hfi1_ipoib_txq *txq = txp->txq; struct ipoib_txreq *tx; + struct hfi1_ipoib_circ_buf *tx_ring; + u32 tail; int ret; - tx = kmem_cache_alloc_node(priv->txreq_cache, - GFP_ATOMIC, - priv->dd->node); - if (unlikely(!tx)) + if (unlikely(hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq))) + /* This shouldn't happen with a stopped queue */ return ERR_PTR(-ENOMEM); + tx_ring = &txq->tx_ring; + tail = tx_ring->tail; + tx = hfi1_txreq_from_idx(tx_ring, tx_ring->tail); + trace_hfi1_txq_alloc_tx(txq); /* so that we can test if the sdma descriptors are there */ tx->txreq.num_desc = 0; tx->priv = priv; - tx->txq = txp->txq; + tx->txq = txq; tx->skb = skb; INIT_LIST_HEAD(&tx->txreq.list); @@ -405,21 +392,20 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, ret = hfi1_ipoib_build_tx_desc(tx, txp); if (likely(!ret)) { - if (txp->txq->flow.as_int != txp->flow.as_int) { - txp->txq->flow.tx_queue = txp->flow.tx_queue; - txp->txq->flow.sc5 = txp->flow.sc5; - txp->txq->sde = + if (txq->flow.as_int != txp->flow.as_int) { + txq->flow.tx_queue = txp->flow.tx_queue; + txq->flow.sc5 = txp->flow.sc5; + txq->sde = sdma_select_engine_sc(priv->dd, txp->flow.tx_queue, txp->flow.sc5); - trace_hfi1_flow_switch(txp->txq); + trace_hfi1_flow_switch(txq); } return tx; } sdma_txclean(priv->dd, &tx->txreq); - kmem_cache_free(priv->txreq_cache, tx); return ERR_PTR(ret); } @@ -480,8 +466,8 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, struct sk_buff *skb, struct ipoib_txparms *txp) { - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); struct hfi1_ipoib_txq *txq = txp->txq; + struct hfi1_ipoib_circ_buf *tx_ring; struct ipoib_txreq *tx; int ret; @@ -499,6 +485,9 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, return NETDEV_TX_OK; } + tx_ring = &txq->tx_ring; + /* consume tx */ + smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); ret = hfi1_ipoib_submit_tx(txq, tx); if (likely(!ret)) { tx_ok: @@ -514,9 +503,10 @@ tx_ok: if (ret == -EBUSY || ret == -ECOMM) goto tx_ok; - sdma_txclean(priv->dd, &tx->txreq); - dev_kfree_skb_any(skb); - kmem_cache_free(priv->txreq_cache, tx); + /* mark complete and kick napi tx */ + smp_store_release(&tx->complete, 1); + napi_schedule(tx->txq->napi); + ++dev->stats.tx_carrier_errors; return NETDEV_TX_OK; @@ -527,6 +517,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, struct ipoib_txparms *txp) { struct hfi1_ipoib_txq *txq = txp->txq; + struct hfi1_ipoib_circ_buf *tx_ring; struct ipoib_txreq *tx; /* Has the flow change ? */ @@ -556,6 +547,9 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, return NETDEV_TX_OK; } + tx_ring = &txq->tx_ring; + /* consume tx */ + smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); list_add_tail(&tx->txreq.list, &txq->tx_list); hfi1_ipoib_check_queue_depth(txq); @@ -696,31 +690,22 @@ static void hfi1_ipoib_flush_txq(struct work_struct *work) int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) { struct net_device *dev = priv->netdev; - char buf[HFI1_IPOIB_TXREQ_NAME_LEN]; - unsigned long tx_ring_size; + u32 tx_ring_size, tx_item_size; int i; - /* - * Ring holds 1 less than tx_ring_size - * Round up to next power of 2 in order to hold at least tx_queue_len - */ - tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1); - - snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit); - priv->txreq_cache = kmem_cache_create(buf, - sizeof(struct ipoib_txreq), - 0, - 0, - NULL); - if (!priv->txreq_cache) - return -ENOMEM; - priv->tx_napis = kcalloc_node(dev->num_tx_queues, sizeof(struct napi_struct), GFP_KERNEL, priv->dd->node); if (!priv->tx_napis) - goto free_txreq_cache; + return -ENOMEM; + + /* + * Ring holds 1 less than tx_ring_size + * Round up to next power of 2 in order to hold at least tx_queue_len + */ + tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1); + tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq)); priv->txqs = kcalloc_node(dev->num_tx_queues, sizeof(struct hfi1_ipoib_txq), @@ -756,19 +741,17 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) priv->dd->node); txq->tx_ring.items = - kcalloc_node(tx_ring_size, - sizeof(struct ipoib_txreq *), + kcalloc_node(tx_ring_size, tx_item_size, GFP_KERNEL, priv->dd->node); if (!txq->tx_ring.items) goto free_txqs; - spin_lock_init(&txq->tx_ring.producer_lock); - spin_lock_init(&txq->tx_ring.consumer_lock); txq->tx_ring.max_items = tx_ring_size; + txq->tx_ring.shift = ilog2(tx_ring_size); txq->napi = &priv->tx_napis[i]; netif_tx_napi_add(dev, txq->napi, - hfi1_ipoib_process_tx_ring, + hfi1_ipoib_poll_tx_ring, NAPI_POLL_WEIGHT); } @@ -788,10 +771,6 @@ free_txqs: free_tx_napis: kfree(priv->tx_napis); priv->tx_napis = NULL; - -free_txreq_cache: - kmem_cache_destroy(priv->txreq_cache); - priv->txreq_cache = NULL; return -ENOMEM; } @@ -808,13 +787,13 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) list_del(&txreq->list); sdma_txclean(txq->priv->dd, &tx->txreq); dev_kfree_skb_any(tx->skb); - kmem_cache_free(txq->priv->txreq_cache, tx); + tx->skb = NULL; atomic64_inc(complete_txreqs); } if (hfi1_ipoib_used(txq)) dd_dev_warn(txq->priv->dd, - "txq %d not empty found %llu requests\n", + "txq %d not empty found %u requests\n", txq->q_idx, hfi1_ipoib_txreqs(txq->sent_txreqs, atomic64_read(complete_txreqs))); @@ -831,7 +810,7 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) iowait_sdma_drain(&txq->wait); hfi1_ipoib_drain_tx_list(txq); netif_napi_del(txq->napi); - (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + hfi1_ipoib_drain_tx_ring(txq); kfree(txq->tx_ring.items); } @@ -840,9 +819,6 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) kfree(priv->tx_napis); priv->tx_napis = NULL; - - kmem_cache_destroy(priv->txreq_cache); - priv->txreq_cache = NULL; } void hfi1_ipoib_napi_tx_enable(struct net_device *dev) @@ -866,7 +842,7 @@ void hfi1_ipoib_napi_tx_disable(struct net_device *dev) struct hfi1_ipoib_txq *txq = &priv->txqs[i]; napi_disable(txq->napi); - (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + hfi1_ipoib_drain_tx_ring(txq); } } @@ -888,9 +864,9 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", txq->sent_txreqs, completed, hfi1_ipoib_used(txq)); - dd_dev_info(priv->dd, "tx_queue_len %u max_items %lu\n", + dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n", dev->tx_queue_len, txq->tx_ring.max_items); - dd_dev_info(priv->dd, "head %lu tail %lu\n", + dd_dev_info(priv->dd, "head %u tail %u\n", txq->tx_ring.head, txq->tx_ring.tail); dd_dev_info(priv->dd, "wait queued %u\n", !list_empty(&txq->wait.list)); -- cgit From 4bf0ca0c9f77b064ece279ff83b6d7eb679551fb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 13 Sep 2021 09:28:31 -0400 Subject: IB/hfi1: Get rid of hot path divide The pointer math in this statemet does a divide; struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; Elminate the divide by embedding the struct napi_strut in the txq and getting the txq with a container_of() using the newly embedded napi. Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets") Link: https://lore.kernel.org/r/20210913132831.131370.3993.stgit@awfm-01.cornelisnetworks.com Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib.h | 3 +-- drivers/infiniband/hw/hfi1/ipoib_tx.c | 35 ++++++++++------------------------- 2 files changed, 11 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 4e91b7039293..71b102d2f7b6 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -78,6 +78,7 @@ struct hfi1_ipoib_circ_buf { * @tx_ring: ring of ipoib txreqs to be reaped by napi callback */ struct hfi1_ipoib_txq { + struct napi_struct napi; struct hfi1_ipoib_dev_priv *priv; struct sdma_engine *sde; struct list_head tx_list; @@ -91,7 +92,6 @@ struct hfi1_ipoib_txq { struct iowait wait; atomic64_t ____cacheline_aligned_in_smp complete_txreqs; - struct napi_struct *napi; struct hfi1_ipoib_circ_buf tx_ring; }; @@ -100,7 +100,6 @@ struct hfi1_ipoib_dev_priv { struct net_device *netdev; struct ib_device *device; struct hfi1_ipoib_txq *txqs; - struct napi_struct *tx_napis; u16 pkey; u16 pkey_index; u32 qkey; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 0a5d32721dfe..053eb43c800f 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -172,8 +172,8 @@ static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) { - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); - struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; + struct hfi1_ipoib_txq *txq = + container_of(napi, struct hfi1_ipoib_txq, napi); struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; u32 head = tx_ring->head; u32 max_tx = tx_ring->max_items; @@ -211,7 +211,7 @@ static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) tx->sdma_status = status; /* see hfi1_ipoib_poll_tx_ring */ smp_store_release(&tx->complete, 1); - napi_schedule_irqoff(tx->txq->napi); + napi_schedule_irqoff(&tx->txq->napi); } static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, @@ -505,7 +505,7 @@ tx_ok: /* mark complete and kick napi tx */ smp_store_release(&tx->complete, 1); - napi_schedule(tx->txq->napi); + napi_schedule(&tx->txq->napi); ++dev->stats.tx_carrier_errors; @@ -693,13 +693,6 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) u32 tx_ring_size, tx_item_size; int i; - priv->tx_napis = kcalloc_node(dev->num_tx_queues, - sizeof(struct napi_struct), - GFP_KERNEL, - priv->dd->node); - if (!priv->tx_napis) - return -ENOMEM; - /* * Ring holds 1 less than tx_ring_size * Round up to next power of 2 in order to hold at least tx_queue_len @@ -712,7 +705,7 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) GFP_KERNEL, priv->dd->node); if (!priv->txqs) - goto free_tx_napis; + return -ENOMEM; for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; @@ -749,8 +742,7 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) txq->tx_ring.max_items = tx_ring_size; txq->tx_ring.shift = ilog2(tx_ring_size); - txq->napi = &priv->tx_napis[i]; - netif_tx_napi_add(dev, txq->napi, + netif_tx_napi_add(dev, &txq->napi, hfi1_ipoib_poll_tx_ring, NAPI_POLL_WEIGHT); } @@ -761,16 +753,12 @@ free_txqs: for (i--; i >= 0; i--) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; - netif_napi_del(txq->napi); + netif_napi_del(&txq->napi); kfree(txq->tx_ring.items); } kfree(priv->txqs); priv->txqs = NULL; - -free_tx_napis: - kfree(priv->tx_napis); - priv->tx_napis = NULL; return -ENOMEM; } @@ -809,16 +797,13 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) iowait_cancel_work(&txq->wait); iowait_sdma_drain(&txq->wait); hfi1_ipoib_drain_tx_list(txq); - netif_napi_del(txq->napi); + netif_napi_del(&txq->napi); hfi1_ipoib_drain_tx_ring(txq); kfree(txq->tx_ring.items); } kfree(priv->txqs); priv->txqs = NULL; - - kfree(priv->tx_napis); - priv->tx_napis = NULL; } void hfi1_ipoib_napi_tx_enable(struct net_device *dev) @@ -829,7 +814,7 @@ void hfi1_ipoib_napi_tx_enable(struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; - napi_enable(txq->napi); + napi_enable(&txq->napi); } } @@ -841,7 +826,7 @@ void hfi1_ipoib_napi_tx_disable(struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; - napi_disable(txq->napi); + napi_disable(&txq->napi); hfi1_ipoib_drain_tx_ring(txq); } } -- cgit From a7125869b2c3f495666bccb7c58567eddffaef59 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 13 Sep 2021 09:28:37 -0400 Subject: IB/hfi1: Get rid of tx priv backpointer The txq has the backpointer, so this is a micro optimization for the tx path. Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets") Link: https://lore.kernel.org/r/20210913132836.131370.89704.stgit@awfm-01.cornelisnetworks.com Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib_tx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 053eb43c800f..734b91d31939 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -133,7 +133,7 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) { - struct hfi1_ipoib_dev_priv *priv = tx->priv; + struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; if (likely(!tx->sdma_status)) { dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len); @@ -273,7 +273,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, struct ipoib_txparms *txp) { - struct hfi1_ipoib_dev_priv *priv = tx->priv; + struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; struct sk_buff *skb = tx->skb; struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); @@ -383,7 +383,6 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, /* so that we can test if the sdma descriptors are there */ tx->txreq.num_desc = 0; - tx->priv = priv; tx->txq = txq; tx->skb = skb; INIT_LIST_HEAD(&tx->txreq.list); @@ -491,7 +490,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, ret = hfi1_ipoib_submit_tx(txq, tx); if (likely(!ret)) { tx_ok: - trace_sdma_output_ibhdr(tx->priv->dd, + trace_sdma_output_ibhdr(txq->priv->dd, &tx->sdma_hdr.hdr, ib_is_sc5(txp->flow.sc5)); hfi1_ipoib_check_queue_depth(txq); @@ -554,7 +553,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, hfi1_ipoib_check_queue_depth(txq); - trace_sdma_output_ibhdr(tx->priv->dd, + trace_sdma_output_ibhdr(txq->priv->dd, &tx->sdma_hdr.hdr, ib_is_sc5(txp->flow.sc5)); -- cgit From f5dc70a0e1420fa95c9f3f0dd65e27a89a5c787d Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 13 Sep 2021 09:28:42 -0400 Subject: IB/hfi1: Tune netdev xmit cachelines This patch moves fields in the ring and creates a line for the producer and the consumer. The adds a consumer side variable that tracks the ring avail so that the code doesn't have the read the other cacheline to get a count for every packet. A read now only occurs when the avail is at 0. Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets") Link: https://lore.kernel.org/r/20210913132842.131370.15636.stgit@awfm-01.cornelisnetworks.com Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib.h | 39 ++++++++++--------- drivers/infiniband/hw/hfi1/ipoib_tx.c | 71 ++++++++++++++++++++--------------- drivers/infiniband/hw/hfi1/trace_tx.h | 8 ++-- 3 files changed, 66 insertions(+), 52 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 71b102d2f7b6..8d9a03a5368a 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -46,18 +46,31 @@ union hfi1_ipoib_flow { /** * struct hfi1_ipoib_circ_buf - List of items to be processed * @items: ring of items each a power of two size - * @head: ring head - * @tail: ring tail * @max_items: max items + 1 that the ring can contain * @shift: log2 of size for getting txreq + * @sent_txreqs: count of txreqs posted to sdma + * @tail: ring tail + * @stops: count of stops of queue + * @ring_full: ring has been filled + * @no_desc: descriptor shortage seen + * @complete_txreqs: count of txreqs completed by sdma + * @head: ring head */ struct ipoib_txreq; struct hfi1_ipoib_circ_buf { void *items; - u32 head; - u32 tail; u32 max_items; u32 shift; + /* consumer cache line */ + u64 ____cacheline_aligned_in_smp sent_txreqs; + u32 avail; + u32 tail; + atomic_t stops; + atomic_t ring_full; + atomic_t no_desc; + /* producer cache line */ + atomic64_t ____cacheline_aligned_in_smp complete_txreqs; + u32 head; }; /** @@ -66,14 +79,10 @@ struct hfi1_ipoib_circ_buf { * @sde: sdma engine * @tx_list: tx request list * @sent_txreqs: count of txreqs posted to sdma - * @stops: count of stops of queue - * @ring_full: ring has been filled - * @no_desc: descriptor shortage seen * @flow: tracks when list needs to be flushed for a flow change * @q_idx: ipoib Tx queue index * @pkts_sent: indicator packets have been sent from this queue * @wait: iowait structure - * @complete_txreqs: count of txreqs completed by sdma * @napi: pointer to tx napi interface * @tx_ring: ring of ipoib txreqs to be reaped by napi callback */ @@ -82,17 +91,12 @@ struct hfi1_ipoib_txq { struct hfi1_ipoib_dev_priv *priv; struct sdma_engine *sde; struct list_head tx_list; - u64 sent_txreqs; - atomic_t stops; - atomic_t ring_full; - atomic_t no_desc; union hfi1_ipoib_flow flow; u8 q_idx; bool pkts_sent; struct iowait wait; - atomic64_t ____cacheline_aligned_in_smp complete_txreqs; - struct hfi1_ipoib_circ_buf tx_ring; + struct hfi1_ipoib_circ_buf ____cacheline_aligned_in_smp tx_ring; }; struct hfi1_ipoib_dev_priv { @@ -100,13 +104,12 @@ struct hfi1_ipoib_dev_priv { struct net_device *netdev; struct ib_device *device; struct hfi1_ipoib_txq *txqs; + const struct net_device_ops *netdev_ops; + struct rvt_qp *qp; + u32 qkey; u16 pkey; u16 pkey_index; - u32 qkey; u8 port_num; - - const struct net_device_ops *netdev_ops; - struct rvt_qp *qp; }; /* hfi1 ipoib rdma netdev's private data structure */ diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 734b91d31939..c3e43dab2eb7 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -66,21 +66,21 @@ static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) { - return hfi1_ipoib_txreqs(txq->sent_txreqs, - atomic64_read(&txq->complete_txreqs)); + return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, + atomic64_read(&txq->tx_ring.complete_txreqs)); } static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) { trace_hfi1_txq_stop(txq); - if (atomic_inc_return(&txq->stops) == 1) + if (atomic_inc_return(&txq->tx_ring.stops) == 1) netif_stop_subqueue(txq->priv->netdev, txq->q_idx); } static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq) { trace_hfi1_txq_wake(txq); - if (atomic_dec_and_test(&txq->stops)) + if (atomic_dec_and_test(&txq->tx_ring.stops)) netif_wake_subqueue(txq->priv->netdev, txq->q_idx); } @@ -98,9 +98,9 @@ static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq) static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) { - ++txq->sent_txreqs; + ++txq->tx_ring.sent_txreqs; if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) && - !atomic_xchg(&txq->ring_full, 1)) { + !atomic_xchg(&txq->tx_ring.ring_full, 1)) { trace_hfi1_txq_full(txq); hfi1_ipoib_stop_txq(txq); } @@ -125,7 +125,7 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) * to protect against ring overflow. */ if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) && - atomic_xchg(&txq->ring_full, 0)) { + atomic_xchg(&txq->tx_ring.ring_full, 0)) { trace_hfi1_txq_xmit_unstopped(txq); hfi1_ipoib_wake_txq(txq); } @@ -168,6 +168,7 @@ static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) tx_ring->tail = 0; atomic64_set(&txq->complete_txreqs, 0); txq->sent_txreqs = 0; + tx_ring->avail = hfi1_ipoib_ring_hwat(txq); } static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) @@ -190,7 +191,7 @@ static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) head = CIRC_NEXT(head, max_tx); tx = hfi1_txreq_from_idx(tx_ring, head); } - atomic64_add(work_done, &txq->complete_txreqs); + atomic64_add(work_done, &txq->tx_ring.complete_txreqs); /* Finished freeing tx items so store the head value. */ smp_store_release(&tx_ring->head, head); @@ -344,7 +345,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(txp->dqpn); - ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs)); + ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs)); /* Build the deth */ ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); @@ -369,16 +370,25 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); struct hfi1_ipoib_txq *txq = txp->txq; struct ipoib_txreq *tx; - struct hfi1_ipoib_circ_buf *tx_ring; - u32 tail; + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; + u32 tail = tx_ring->tail; int ret; - if (unlikely(hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq))) - /* This shouldn't happen with a stopped queue */ - return ERR_PTR(-ENOMEM); - tx_ring = &txq->tx_ring; - tail = tx_ring->tail; - tx = hfi1_txreq_from_idx(tx_ring, tx_ring->tail); + if (unlikely(!tx_ring->avail)) { + u32 head; + + if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq)) + /* This shouldn't happen with a stopped queue */ + return ERR_PTR(-ENOMEM); + /* See hfi1_ipoib_poll_tx_ring() */ + head = smp_load_acquire(&tx_ring->head); + tx_ring->avail = + min_t(u32, hfi1_ipoib_ring_hwat(txq), + CIRC_CNT(head, tail, tx_ring->max_items)); + } else { + tx_ring->avail--; + } + tx = hfi1_txreq_from_idx(tx_ring, tail); trace_hfi1_txq_alloc_tx(txq); /* so that we can test if the sdma descriptors are there */ @@ -639,7 +649,7 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, if (list_empty(&txq->wait.list)) { struct hfi1_ibport *ibp = &sde->ppd->ibport_data; - if (!atomic_xchg(&txq->no_desc, 1)) { + if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) { trace_hfi1_txq_queued(txq); hfi1_ipoib_stop_txq(txq); } @@ -682,7 +692,7 @@ static void hfi1_ipoib_flush_txq(struct work_struct *work) if (likely(dev->reg_state == NETREG_REGISTERED) && likely(!hfi1_ipoib_flush_tx_list(dev, txq))) - if (atomic_xchg(&txq->no_desc, 0)) + if (atomic_xchg(&txq->tx_ring.no_desc, 0)) hfi1_ipoib_wake_txq(txq); } @@ -720,10 +730,10 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) txq->priv = priv; txq->sde = NULL; INIT_LIST_HEAD(&txq->tx_list); - atomic64_set(&txq->complete_txreqs, 0); - atomic_set(&txq->stops, 0); - atomic_set(&txq->ring_full, 0); - atomic_set(&txq->no_desc, 0); + atomic64_set(&txq->tx_ring.complete_txreqs, 0); + atomic_set(&txq->tx_ring.stops, 0); + atomic_set(&txq->tx_ring.ring_full, 0); + atomic_set(&txq->tx_ring.no_desc, 0); txq->q_idx = i; txq->flow.tx_queue = 0xff; txq->flow.sc5 = 0xff; @@ -740,6 +750,7 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) txq->tx_ring.max_items = tx_ring_size; txq->tx_ring.shift = ilog2(tx_ring_size); + txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); netif_tx_napi_add(dev, &txq->napi, hfi1_ipoib_poll_tx_ring, @@ -765,7 +776,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) { struct sdma_txreq *txreq; struct sdma_txreq *txreq_tmp; - atomic64_t *complete_txreqs = &txq->complete_txreqs; + atomic64_t *complete_txreqs = &txq->tx_ring.complete_txreqs; list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { struct ipoib_txreq *tx = @@ -782,7 +793,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) dd_dev_warn(txq->priv->dd, "txq %d not empty found %u requests\n", txq->q_idx, - hfi1_ipoib_txreqs(txq->sent_txreqs, + hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, atomic64_read(complete_txreqs))); } @@ -834,20 +845,20 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); struct hfi1_ipoib_txq *txq = &priv->txqs[q]; - u64 completed = atomic64_read(&txq->complete_txreqs); + u64 completed = atomic64_read(&txq->tx_ring.complete_txreqs); dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n", (unsigned long long)txq, q, __netif_subqueue_stopped(dev, txq->q_idx), - atomic_read(&txq->stops), - atomic_read(&txq->no_desc), - atomic_read(&txq->ring_full)); + atomic_read(&txq->tx_ring.stops), + atomic_read(&txq->tx_ring.no_desc), + atomic_read(&txq->tx_ring.ring_full)); dd_dev_info(priv->dd, "sde %llx engine %u\n", (unsigned long long)txq->sde, txq->sde ? txq->sde->this_idx : 0); dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", - txq->sent_txreqs, completed, hfi1_ipoib_used(txq)); + txq->tx_ring.sent_txreqs, completed, hfi1_ipoib_used(txq)); dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n", dev->tx_queue_len, txq->tx_ring.max_items); dd_dev_info(priv->dd, "head %u tail %u\n", diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 7318aa6150b5..c9b1cd0cdc98 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -917,11 +917,11 @@ DECLARE_EVENT_CLASS(/* AIP */ __entry->tail = txq->tx_ring.tail; __entry->idx = txq->q_idx; __entry->used = - txq->sent_txreqs - - atomic64_read(&txq->complete_txreqs); + txq->tx_ring.sent_txreqs - + atomic64_read(&txq->tx_ring.complete_txreqs); __entry->flow = txq->flow.as_int; - __entry->stops = atomic_read(&txq->stops); - __entry->no_desc = atomic_read(&txq->no_desc); + __entry->stops = atomic_read(&txq->tx_ring.stops); + __entry->no_desc = atomic_read(&txq->tx_ring.no_desc); __entry->stopped = __netif_subqueue_stopped(txq->priv->netdev, txq->q_idx); ), -- cgit From b4b90a50cbb97f327b9231bd61d6592296093309 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 13 Sep 2021 09:28:47 -0400 Subject: IB/hfi1: Remove atomic completion count The atomic is not needed. Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets") Link: https://lore.kernel.org/r/20210913132847.131370.54250.stgit@awfm-01.cornelisnetworks.com Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib.h | 2 +- drivers/infiniband/hw/hfi1/ipoib_tx.c | 18 ++++++++---------- drivers/infiniband/hw/hfi1/trace_tx.h | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 8d9a03a5368a..eb5c25199ad3 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -69,7 +69,7 @@ struct hfi1_ipoib_circ_buf { atomic_t ring_full; atomic_t no_desc; /* producer cache line */ - atomic64_t ____cacheline_aligned_in_smp complete_txreqs; + u64 ____cacheline_aligned_in_smp complete_txreqs; u32 head; }; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index c3e43dab2eb7..1a7a8372d8c8 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -67,7 +67,7 @@ static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) { return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, - atomic64_read(&txq->tx_ring.complete_txreqs)); + txq->tx_ring.complete_txreqs); } static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) @@ -166,8 +166,8 @@ static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) } tx_ring->head = 0; tx_ring->tail = 0; - atomic64_set(&txq->complete_txreqs, 0); - txq->sent_txreqs = 0; + tx_ring->complete_txreqs = 0; + tx_ring->sent_txreqs = 0; tx_ring->avail = hfi1_ipoib_ring_hwat(txq); } @@ -191,7 +191,7 @@ static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) head = CIRC_NEXT(head, max_tx); tx = hfi1_txreq_from_idx(tx_ring, head); } - atomic64_add(work_done, &txq->tx_ring.complete_txreqs); + tx_ring->complete_txreqs += work_done; /* Finished freeing tx items so store the head value. */ smp_store_release(&tx_ring->head, head); @@ -730,7 +730,6 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) txq->priv = priv; txq->sde = NULL; INIT_LIST_HEAD(&txq->tx_list); - atomic64_set(&txq->tx_ring.complete_txreqs, 0); atomic_set(&txq->tx_ring.stops, 0); atomic_set(&txq->tx_ring.ring_full, 0); atomic_set(&txq->tx_ring.no_desc, 0); @@ -776,7 +775,6 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) { struct sdma_txreq *txreq; struct sdma_txreq *txreq_tmp; - atomic64_t *complete_txreqs = &txq->tx_ring.complete_txreqs; list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { struct ipoib_txreq *tx = @@ -786,7 +784,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) sdma_txclean(txq->priv->dd, &tx->txreq); dev_kfree_skb_any(tx->skb); tx->skb = NULL; - atomic64_inc(complete_txreqs); + txq->tx_ring.complete_txreqs++; } if (hfi1_ipoib_used(txq)) @@ -794,7 +792,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) "txq %d not empty found %u requests\n", txq->q_idx, hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, - atomic64_read(complete_txreqs))); + txq->tx_ring.complete_txreqs)); } void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) @@ -845,7 +843,6 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); struct hfi1_ipoib_txq *txq = &priv->txqs[q]; - u64 completed = atomic64_read(&txq->tx_ring.complete_txreqs); dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n", (unsigned long long)txq, q, @@ -858,7 +855,8 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) txq->sde ? txq->sde->this_idx : 0); dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", - txq->tx_ring.sent_txreqs, completed, hfi1_ipoib_used(txq)); + txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs, + hfi1_ipoib_used(txq)); dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n", dev->tx_queue_len, txq->tx_ring.max_items); dd_dev_info(priv->dd, "head %u tail %u\n", diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index c9b1cd0cdc98..f00696f89652 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -918,7 +918,7 @@ DECLARE_EVENT_CLASS(/* AIP */ __entry->idx = txq->q_idx; __entry->used = txq->tx_ring.sent_txreqs - - atomic64_read(&txq->tx_ring.complete_txreqs); + txq->tx_ring.complete_txreqs; __entry->flow = txq->flow.as_int; __entry->stops = atomic_read(&txq->tx_ring.stops); __entry->no_desc = atomic_read(&txq->tx_ring.no_desc); -- cgit From 6d1ebccbd64af1f989c1e8a5976f82282ddd4ee4 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 13 Sep 2021 09:28:52 -0400 Subject: IB/hfi1: Add ring consumer and producers traces These traces are used to debugging ring issues. The ipoib_txreq needed to be moved to a header file to allow access from the trace header file. The trace changes include: - new producer/consumer traces - new allocation deallocation traces - additional fidelity for SDMA engine prints Fixes: 4bd00b55c978 ("IB/hfi1: Add AIP tx traces") Link: https://lore.kernel.org/r/20210913132852.131370.9664.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib.h | 21 +++++++++++- drivers/infiniband/hw/hfi1/ipoib_tx.c | 23 ++----------- drivers/infiniband/hw/hfi1/trace_tx.h | 63 ++++++++++++++++++++++++++++++++++- 3 files changed, 85 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index eb5c25199ad3..909122934246 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -43,6 +43,26 @@ union hfi1_ipoib_flow { } __attribute__((__packed__)); }; +/** + * struct ipoib_txreq - IPOIB transmit descriptor + * @txreq: sdma transmit request + * @sdma_hdr: 9b ib headers + * @sdma_status: status returned by sdma engine + * @complete: non-zero implies complete + * @priv: ipoib netdev private data + * @txq: txq on which skb was output + * @skb: skb to send + */ +struct ipoib_txreq { + struct sdma_txreq txreq; + struct hfi1_sdma_header sdma_hdr; + int sdma_status; + int complete; + struct hfi1_ipoib_dev_priv *priv; + struct hfi1_ipoib_txq *txq; + struct sk_buff *skb; +}; + /** * struct hfi1_ipoib_circ_buf - List of items to be processed * @items: ring of items each a power of two size @@ -56,7 +76,6 @@ union hfi1_ipoib_flow { * @complete_txreqs: count of txreqs completed by sdma * @head: ring head */ -struct ipoib_txreq; struct hfi1_ipoib_circ_buf { void *items; u32 max_items; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 1a7a8372d8c8..d1c2cf52ba48 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -22,26 +22,6 @@ #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) -/** - * struct ipoib_txreq - IPOIB transmit descriptor - * @txreq: sdma transmit request - * @sdma_hdr: 9b ib headers - * @sdma_status: status returned by sdma engine - * @complete: non-zero implies complete - * @priv: ipoib netdev private data - * @txq: txq on which skb was output - * @skb: skb to send - */ -struct ipoib_txreq { - struct sdma_txreq txreq; - struct hfi1_sdma_header sdma_hdr; - int sdma_status; - int complete; - struct hfi1_ipoib_dev_priv *priv; - struct hfi1_ipoib_txq *txq; - struct sk_buff *skb; -}; - struct ipoib_txparms { struct hfi1_devdata *dd; struct rdma_ah_attr *ah_attr; @@ -187,6 +167,7 @@ static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) if (!smp_load_acquire(&tx->complete)) break; tx->complete = 0; + trace_hfi1_tx_produce(tx, head); hfi1_ipoib_free_tx(tx, budget); head = CIRC_NEXT(head, max_tx); tx = hfi1_txreq_from_idx(tx_ring, head); @@ -495,6 +476,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, } tx_ring = &txq->tx_ring; + trace_hfi1_tx_consume(tx, tx_ring->tail); /* consume tx */ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); ret = hfi1_ipoib_submit_tx(txq, tx); @@ -557,6 +539,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, } tx_ring = &txq->tx_ring; + trace_hfi1_tx_consume(tx, tx_ring->tail); /* consume tx */ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); list_add_tail(&tx->txreq.list, &txq->tx_list); diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index f00696f89652..ed1b9e1e4b17 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -926,11 +926,13 @@ DECLARE_EVENT_CLASS(/* AIP */ __netif_subqueue_stopped(txq->priv->netdev, txq->q_idx); ), TP_printk(/* print */ - "[%s] txq %llx idx %u sde %llx head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u", + "[%s] txq %llx idx %u sde %llx:%u cpu %d head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u", __get_str(dev), (unsigned long long)__entry->txq, __entry->idx, (unsigned long long)__entry->sde, + __entry->sde ? __entry->sde->this_idx : 0, + __entry->sde ? __entry->sde->cpu : 0, __entry->head, __entry->tail, __entry->flow, @@ -995,6 +997,65 @@ DEFINE_EVENT(/* xmit_unstopped */ TP_ARGS(txq) ); +DECLARE_EVENT_CLASS(/* AIP */ + hfi1_ipoib_tx_template, + TP_PROTO(struct ipoib_txreq *tx, u32 idx), + TP_ARGS(tx, idx), + TP_STRUCT__entry(/* entry */ + DD_DEV_ENTRY(tx->txq->priv->dd) + __field(struct ipoib_txreq *, tx) + __field(struct hfi1_ipoib_txq *, txq) + __field(struct sk_buff *, skb) + __field(ulong, idx) + ), + TP_fast_assign(/* assign */ + DD_DEV_ASSIGN(tx->txq->priv->dd); + __entry->tx = tx; + __entry->skb = tx->skb; + __entry->txq = tx->txq; + __entry->idx = idx; + ), + TP_printk(/* print */ + "[%s] tx %llx txq %llx,%u skb %llx idx %lu", + __get_str(dev), + (unsigned long long)__entry->tx, + (unsigned long long)__entry->txq, + __entry->txq ? __entry->txq->q_idx : 0, + (unsigned long long)__entry->skb, + __entry->idx + ) +); + +DEFINE_EVENT(/* produce */ + hfi1_ipoib_tx_template, hfi1_tx_produce, + TP_PROTO(struct ipoib_txreq *tx, u32 idx), + TP_ARGS(tx, idx) +); + +DEFINE_EVENT(/* consume */ + hfi1_ipoib_tx_template, hfi1_tx_consume, + TP_PROTO(struct ipoib_txreq *tx, u32 idx), + TP_ARGS(tx, idx) +); + +DEFINE_EVENT(/* alloc_tx */ + hfi1_ipoib_txq_template, hfi1_txq_alloc_tx, + TP_PROTO(struct hfi1_ipoib_txq *txq), + TP_ARGS(txq) +); + +DEFINE_EVENT(/* poll */ + hfi1_ipoib_txq_template, hfi1_txq_poll, + TP_PROTO(struct hfi1_ipoib_txq *txq), + TP_ARGS(txq) +); + +DEFINE_EVENT(/* complete */ + hfi1_ipoib_txq_template, hfi1_txq_complete, + TP_PROTO(struct hfi1_ipoib_txq *txq), + TP_ARGS(txq) +); + #endif /* __HFI1_TRACE_TX_H */ #undef TRACE_INCLUDE_PATH -- cgit From 11333be19c08b9277a87eb8c163217b8c362e91e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Sep 2021 17:53:33 -0500 Subject: RDMA/hfi1: Use struct_size() and flex_array_size() helpers Make use of the struct_size() and flex_array_size() helpers instead of open-coded versions, in order to avoid any potential type mistakes or integer overflows that, in the worse scenario, could lead to heap overflows. Link: https://lore.kernel.org/r/20210927225333.GA192634@embeddedor Link: https://github.com/KSPP/linux/issues/160 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0c86e9d354f8..186d30291260 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -692,8 +692,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, * Allocate the node first so we can handle a potential * failure before we've programmed anything. */ - node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), - GFP_KERNEL); + node = kzalloc(struct_size(node, pages, npages), GFP_KERNEL); if (!node) return -ENOMEM; @@ -713,7 +712,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, node->dma_addr = phys; node->grp = grp; node->freed = false; - memcpy(node->pages, pages, sizeof(struct page *) * npages); + memcpy(node->pages, pages, flex_array_size(node, pages, npages)); if (fd->use_mn) { ret = mmu_interval_notifier_insert( -- cgit From 373efe0f3095ef60a31876376c33c932f4721823 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Tue, 14 Sep 2021 16:02:51 +0800 Subject: RDMA/rxe: Add new RXE_READ_OR_WRITE_MASK 1) Replace (RXE_READ_MASK | RXE_WRITE_MASK) with RXE_READ_OR_WRITE_MASK. 2) Change (RXE_READ_MASK | RXE_WRITE_OR_SEND) to RXE_READ_OR_WRITE_MASK because we don't need to check RETH for RXE_SEND_MASK. Link: https://lore.kernel.org/r/20210914080253.1145353-2-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_opcode.h | 1 + drivers/infiniband/sw/rxe/rxe_resp.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index e02f039b8c44..bbeccb1dcec7 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -84,6 +84,7 @@ enum rxe_hdr_mask { RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), + RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), }; #define OPCODE_NONE (-1) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c5a0b1f5747a..ea7d3ee016d9 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -413,7 +413,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, enum resp_states state; int access; - if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { + if (pkt->mask & RXE_READ_OR_WRITE_MASK) { if (pkt->mask & RXE_RETH_MASK) { qp->resp.va = reth_va(pkt); qp->resp.offset = 0; @@ -434,7 +434,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } /* A zero-byte op is not required to set an addr or rkey. */ - if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && + if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { return RESPST_EXECUTE; -- cgit From 45216d63630a4ce80679c336685d948ec7478bb6 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Tue, 14 Sep 2021 16:02:52 +0800 Subject: RDMA/rxe: Add MASK suffix for RXE_READ_OR_ATOMIC and RXE_WRITE_OR_SEND To reflect the intention, since it is not just a single bit. Link: https://lore.kernel.org/r/20210914080253.1145353-3-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_opcode.h | 4 ++-- drivers/infiniband/sw/rxe/rxe_req.c | 6 +++--- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index bbeccb1dcec7..e3a46b287c15 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -82,8 +82,8 @@ enum rxe_hdr_mask { RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), - RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), - RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), + RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), + RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), }; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 2981b3ef3cc0..fe275fcaffbd 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -461,7 +461,7 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; - if (pkt->mask & RXE_WRITE_OR_SEND) { + if (pkt->mask & RXE_WRITE_OR_SEND_MASK) { if (wqe->wr.send_flags & IB_SEND_INLINE) { u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; @@ -678,13 +678,13 @@ next_wqe: } mask = rxe_opcode[opcode].mask; - if (unlikely(mask & RXE_READ_OR_ATOMIC)) { + if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { if (check_init_depth(qp, wqe)) goto exit; } mtu = get_mtu(qp); - payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; + payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; if (payload > mtu) { if (qp_type(qp) == IB_QPT_UD) { /* C10-93.1.1: If the total sum of all the buffer lengths specified for a diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ea7d3ee016d9..4af0dc95784a 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -362,7 +362,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, } } - if (pkt->mask & RXE_READ_OR_ATOMIC) { + if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue -- cgit From 27da60547de16a877fb192b90e4841e52fbe8fcd Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Tue, 14 Sep 2021 16:02:53 +0800 Subject: RDMA/rxe: Remove unused WR_READ_WRITE_OR_SEND_MASK Link: https://lore.kernel.org/r/20210914080253.1145353-4-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_opcode.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index e3a46b287c15..8f9aaaf260f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -22,7 +22,6 @@ enum rxe_wr_mask { WR_LOCAL_OP_MASK = BIT(5), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, - WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, }; -- cgit From 49b99314b49ef95b5a6f3bd9f0fc8912cfd10a9f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 3 Sep 2021 16:48:15 +0800 Subject: IB/mlx5: Flow through a more detailed return code from get_prefetchable_mr() The error returns for various cases detected by get_prefetchable_mr() get confused as it flows back to userspace. Properly label each error path and flow the error code properly back to the system call. Link: https://lore.kernel.org/r/20210928170846.GA1721590@nvidia.com Suggested-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index d0d98e584ebc..77890a85fc2d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1708,20 +1708,26 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); - if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + if (!mmkey || mmkey->key != lkey) { + mr = ERR_PTR(-ENOENT); goto end; + } + if (mmkey->type != MLX5_MKEY_MR) { + mr = ERR_PTR(-EINVAL); + goto end; + } mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); if (mr->ibmr.pd != pd) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && !mr->umem->writable) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } @@ -1753,7 +1759,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) destroy_prefetch_work(work); } -static bool init_prefetch_work(struct ib_pd *pd, +static int init_prefetch_work(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 pf_flags, struct prefetch_mr_work *work, struct ib_sge *sg_list, u32 num_sge) @@ -1764,17 +1770,19 @@ static bool init_prefetch_work(struct ib_pd *pd, work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - work->frags[i].io_virt = sg_list[i].addr; - work->frags[i].length = sg_list[i].length; - work->frags[i].mr = - get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!work->frags[i].mr) { + struct mlx5_ib_mr *mr; + + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (IS_ERR(mr)) { work->num_sge = i; - return false; + return PTR_ERR(mr); } + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = mr; } work->num_sge = num_sge; - return true; + return 0; } static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, @@ -1790,8 +1798,8 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) - return -ENOENT; + if (IS_ERR(mr)) + return PTR_ERR(mr); ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); if (ret < 0) { @@ -1811,6 +1819,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { u32 pf_flags = 0; struct prefetch_mr_work *work; + int rc; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1826,9 +1835,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge); + if (rc) { destroy_prefetch_work(work); - return -EINVAL; + return rc; } queue_work(system_unbound_wq, &work->work); return 0; -- cgit From 0994a1bcd5f7c0bf7ca3b4938d4f0f6928ac7886 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Tue, 14 Sep 2021 18:12:20 -0700 Subject: RDMA/rxe: Bump up default maximum values used via uverbs In our internal testing we have found that default maximum values are too small. Ideally there should be no limits, but since maximum values are reported via ibv_query_device, we have to return some value. So, the default maximums have been changed to large values. Link: https://lore.kernel.org/r/20210915011220.307585-1-Rao.Shoaib@oracle.com Signed-off-by: Rao Shoaib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index b5a70cbe94aa..aeffaef8f381 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -9,6 +9,8 @@ #include +#define DEFAULT_MAX_VALUE (1 << 20) + static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) { if (mtu < 256) @@ -37,7 +39,7 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) enum rxe_device_param { RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, - RXE_MAX_QP_WR = 0x4000, + RXE_MAX_QP_WR = DEFAULT_MAX_VALUE, RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_AUTO_PATH_MIG @@ -58,42 +60,42 @@ enum rxe_device_param { RXE_MAX_INLINE_DATA = RXE_MAX_WQE_SIZE - sizeof(struct rxe_send_wqe), RXE_MAX_SGE_RD = 32, - RXE_MAX_CQ = 16384, + RXE_MAX_CQ = DEFAULT_MAX_VALUE, RXE_MAX_LOG_CQE = 15, - RXE_MAX_PD = 0x7ffc, + RXE_MAX_PD = DEFAULT_MAX_VALUE, RXE_MAX_QP_RD_ATOM = 128, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, - RXE_MAX_AH = 100, - RXE_MAX_SRQ_WR = 0x4000, + RXE_MAX_AH = DEFAULT_MAX_VALUE, + RXE_MAX_SRQ_WR = DEFAULT_MAX_VALUE, RXE_MIN_SRQ_WR = 1, RXE_MAX_SRQ_SGE = 27, RXE_MIN_SRQ_SGE = 1, RXE_MAX_FMR_PAGE_LIST_LEN = 512, - RXE_MAX_PKEYS = 1, + RXE_MAX_PKEYS = 64, RXE_LOCAL_CA_ACK_DELAY = 15, - RXE_MAX_UCONTEXT = 512, + RXE_MAX_UCONTEXT = DEFAULT_MAX_VALUE, RXE_NUM_PORT = 1, - RXE_MAX_QP = 0x10000, RXE_MIN_QP_INDEX = 16, - RXE_MAX_QP_INDEX = 0x00020000, + RXE_MAX_QP_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_QP = DEFAULT_MAX_VALUE - RXE_MIN_QP_INDEX, - RXE_MAX_SRQ = 0x00001000, RXE_MIN_SRQ_INDEX = 0x00020001, - RXE_MAX_SRQ_INDEX = 0x00040000, + RXE_MAX_SRQ_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_SRQ = DEFAULT_MAX_VALUE - RXE_MIN_SRQ_INDEX, - RXE_MAX_MR = 0x00001000, - RXE_MAX_MW = 0x00001000, RXE_MIN_MR_INDEX = 0x00000001, - RXE_MAX_MR_INDEX = 0x00010000, + RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_MR = DEFAULT_MAX_VALUE - RXE_MIN_MR_INDEX, RXE_MIN_MW_INDEX = 0x00010001, RXE_MAX_MW_INDEX = 0x00020000, + RXE_MAX_MW = 0x00001000, RXE_MAX_PKT_PER_ACK = 64, -- cgit From 99cfddb8a8bd57122effa808653dec83408705a6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 15 Sep 2021 13:25:19 -0300 Subject: RDMA/cma: Split apart the multiple uses of the same list heads Two list heads in the rdma_id_private are being used for multiple purposes, to save a few bytes of memory. Give the different purposes different names and union the memory that is clearly exclusive. list splits into device_item and listen_any_item. device_item is threaded onto the cma_device's list and listen_any goes onto the listen_any_list. IDs doing any listen cannot have devices. listen_list splits into listen_item and listen_list. listen_list is on the parent listen any rdma_id_private and listen_item is on child listen that is bound to a specific cma_dev. Which name should be used in which case depends on the state and other factors of the rdma_id_private. Remap all the confusing references to make sense with the new names, so at least there is some hope of matching the necessary preconditions with each access. Link: https://lore.kernel.org/r/0-v1-a5ead4a0c19d+c3a-cma_list_head_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 34 ++++++++++++++++++---------------- drivers/infiniband/core/cma_priv.h | 11 +++++++++-- 2 files changed, 27 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 704ce595542c..835ac54d4a24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -453,7 +453,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); - list_add_tail(&id_priv->list, &cma_dev->id_list); + list_add_tail(&id_priv->device_item, &cma_dev->id_list); trace_cm_id_attach(id_priv, cma_dev->device); } @@ -470,7 +470,7 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); - list_del(&id_priv->list); + list_del_init(&id_priv->device_item); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; id_priv->id.device = NULL; @@ -854,6 +854,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, init_completion(&id_priv->comp); refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); + INIT_LIST_HEAD(&id_priv->device_item); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -1647,7 +1648,7 @@ static struct rdma_id_private *cma_find_listener( return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, - listen_list) { + listen_item) { if (id_priv_dev->id.device == cm_id->device && cma_match_net_dev(&id_priv_dev->id, net_dev, req)) @@ -1756,14 +1757,15 @@ static void _cma_cancel_listens(struct rdma_id_private *id_priv) * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ - list_del(&id_priv->list); + list_del_init(&id_priv->listen_any_item); while (!list_empty(&id_priv->listen_list)) { - dev_id_priv = list_entry(id_priv->listen_list.next, - struct rdma_id_private, listen_list); + dev_id_priv = + list_first_entry(&id_priv->listen_list, + struct rdma_id_private, listen_item); /* sync with device removal to avoid duplicate destruction */ - list_del_init(&dev_id_priv->list); - list_del(&dev_id_priv->listen_list); + list_del_init(&dev_id_priv->device_item); + list_del_init(&dev_id_priv->listen_item); mutex_unlock(&lock); rdma_destroy_id(&dev_id_priv->id); @@ -2564,7 +2566,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) goto err_listen; - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list); return 0; err_listen: /* Caller must destroy this after releasing lock */ @@ -2580,13 +2582,13 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) int ret; mutex_lock(&lock); - list_add_tail(&id_priv->list, &listen_any_list); + list_add_tail(&id_priv->listen_any_item, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) { /* Prevent racing with cma_process_remove() */ if (to_destroy) - list_del_init(&to_destroy->list); + list_del_init(&to_destroy->device_item); goto err_listen; } } @@ -4895,7 +4897,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) - list_for_each_entry(id_priv, &cma_dev->id_list, list) { + list_for_each_entry(id_priv, &cma_dev->id_list, device_item) { ret = cma_netdev_change(ndev, id_priv); if (ret) goto out; @@ -4955,10 +4957,10 @@ static void cma_process_remove(struct cma_device *cma_dev) mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { struct rdma_id_private *id_priv = list_first_entry( - &cma_dev->id_list, struct rdma_id_private, list); + &cma_dev->id_list, struct rdma_id_private, device_item); - list_del(&id_priv->listen_list); - list_del_init(&id_priv->list); + list_del_init(&id_priv->listen_item); + list_del_init(&id_priv->device_item); cma_id_get(id_priv); mutex_unlock(&lock); @@ -5035,7 +5037,7 @@ static int cma_add_one(struct ib_device *device) mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) { + list_for_each_entry(id_priv, &listen_any_list, listen_any_item) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) goto free_listen; diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index f92f101ea981..757a0ef79872 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -55,8 +55,15 @@ struct rdma_id_private { struct rdma_bind_list *bind_list; struct hlist_node node; - struct list_head list; /* listen_any_list or cma_device.list */ - struct list_head listen_list; /* per device listens */ + union { + struct list_head device_item; /* On cma_device->id_list */ + struct list_head listen_any_item; /* On listen_any_list */ + }; + union { + /* On rdma_id_private->listen_list */ + struct list_head listen_item; + struct list_head listen_list; + }; struct cma_device *cma_dev; struct list_head mc_list; -- cgit From 2f232912feec57d73b22cb26dcdf687846917e9e Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Wed, 22 Sep 2021 14:53:27 +0200 Subject: RDMA/rtrs: Use sysfs_emit instead of s*printf function for sysfs show sysfs_emit function was added to be aware of the PAGE_SIZE maximum of the temporary buffer used for outputting sysfs content, so there is no possible overruns. So replace the uses of any s*printf functions for the sysfs show functions with sysfs_emit. Link: https://lore.kernel.org/r/20210922125333.351454-2-haris.iqbal@ionos.com Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c | 24 +++++++++++------------- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 2 +- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c index 5e780bdd763d..9c27f21ec040 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c @@ -45,24 +45,23 @@ int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, size_t used; int cpu; - used = scnprintf(buf, len, " "); + used = sysfs_emit(buf, " "); for_each_possible_cpu(cpu) - used += scnprintf(buf + used, len - used, " CPU%u", cpu); + used += sysfs_emit_at(buf, used, " CPU%u", cpu); - used += scnprintf(buf + used, len - used, "\nfrom:"); + used += sysfs_emit_at(buf, used, "\nfrom:"); for_each_possible_cpu(cpu) { s = per_cpu_ptr(stats->pcpu_stats, cpu); - used += scnprintf(buf + used, len - used, " %d", + used += sysfs_emit_at(buf, used, " %d", atomic_read(&s->cpu_migr.from)); } - used += scnprintf(buf + used, len - used, "\nto :"); + used += sysfs_emit_at(buf, used, "\nto :"); for_each_possible_cpu(cpu) { s = per_cpu_ptr(stats->pcpu_stats, cpu); - used += scnprintf(buf + used, len - used, " %d", - s->cpu_migr.to); + used += sysfs_emit_at(buf, used, " %d", s->cpu_migr.to); } - used += scnprintf(buf + used, len - used, "\n"); + used += sysfs_emit_at(buf, used, "\n"); return used; } @@ -70,9 +69,8 @@ int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, size_t len) { - return scnprintf(buf, len, "%d %d\n", - stats->reconnects.successful_cnt, - stats->reconnects.fail_cnt); + return sysfs_emit(buf, "%d %d\n", stats->reconnects.successful_cnt, + stats->reconnects.fail_cnt); } ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, @@ -94,7 +92,7 @@ ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, sum.failover_cnt += r->failover_cnt; } - return scnprintf(page, len, "%llu %llu %llu %llu %u %llu\n", + return sysfs_emit(page, "%llu %llu %llu %llu %u %llu\n", sum.dir[READ].cnt, sum.dir[READ].size_total, sum.dir[WRITE].cnt, sum.dir[WRITE].size_total, atomic_read(&stats->inflight), sum.failover_cnt); @@ -103,7 +101,7 @@ ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, char *page, size_t len) { - return scnprintf(page, len, "echo 1 to reset all statistics\n"); + return sysfs_emit(page, "echo 1 to reset all statistics\n"); } int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 20efd44297fb..9c43ce5ba1c1 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -102,7 +102,7 @@ static ssize_t rtrs_srv_src_addr_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_srv_sess, kobj); cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + return cnt + sysfs_emit_at(page, cnt, "\n"); } static struct kobj_attribute rtrs_srv_src_addr_attr = -- cgit From 80ad07f7e2bfab79e6209c7b8ce4665ffd9f25ed Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Wed, 22 Sep 2021 14:53:28 +0200 Subject: RDMA/rtrs: Remove len parameter from helper print functions of sysfs Since we have changed all sysfs show functions to use sysfs_emit, we do not require the len (PAGE_SIZE) in our helper print functions. So remove it from the function parameter. Link: https://lore.kernel.org/r/20210922125333.351454-3-haris.iqbal@ionos.com Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c | 12 ++++-------- drivers/infiniband/ulp/rtrs/rtrs-clt.h | 12 +++++------- drivers/infiniband/ulp/rtrs/rtrs-pri.h | 2 +- drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c | 3 +-- drivers/infiniband/ulp/rtrs/rtrs-srv.h | 3 +-- 5 files changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c index 9c27f21ec040..61d5e0018392 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c @@ -37,8 +37,7 @@ void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats) s->rdma.failover_cnt++; } -int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, - char *buf, size_t len) +int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) { struct rtrs_clt_stats_pcpu *s; @@ -66,15 +65,13 @@ int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, return used; } -int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, - size_t len) +int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf) { return sysfs_emit(buf, "%d %d\n", stats->reconnects.successful_cnt, stats->reconnects.fail_cnt); } -ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, - char *page, size_t len) +ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, char *page) { struct rtrs_clt_stats_rdma sum; struct rtrs_clt_stats_rdma *r; @@ -98,8 +95,7 @@ ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, atomic_read(&stats->inflight), sum.failover_cnt); } -ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, - char *page, size_t len) +ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, char *page) { return sysfs_emit(page, "echo 1 to reset all statistics\n"); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 9dc819885ec7..6d81aae53df4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -224,19 +224,17 @@ void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir); int rtrs_clt_reset_rdma_lat_distr_stats(struct rtrs_clt_stats *stats, bool enable); ssize_t rtrs_clt_stats_rdma_lat_distr_to_str(struct rtrs_clt_stats *stats, - char *page, size_t len); + char *page); int rtrs_clt_reset_cpu_migr_stats(struct rtrs_clt_stats *stats, bool enable); -int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf, - size_t len); +int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); int rtrs_clt_reset_reconnects_stat(struct rtrs_clt_stats *stats, bool enable); -int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, - size_t len); +int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf); int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable); ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, - char *page, size_t len); + char *page); int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *stats, bool enable); ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, - char *page, size_t len); + char *page); /* rtrs-clt-sysfs.c */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index d12ddfa50747..78eac9a4f703 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -398,7 +398,7 @@ static ssize_t get_value##_show(struct kobject *kobj, \ { \ type *stats = container_of(kobj, type, kobj_stats); \ \ - return print(stats, page, PAGE_SIZE); \ + return print(stats, page); \ } #define STAT_ATTR(type, stat, print, reset) \ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c index 12c374b5eb6e..44b1c1652131 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c @@ -23,8 +23,7 @@ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable) return -EINVAL; } -ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, - char *page, size_t len) +ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page) { struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 9d8d2a91a235..7d403c12faf3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -136,8 +136,7 @@ static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s, /* functions which are implemented in rtrs-srv-stats.c */ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable); -ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, - char *page, size_t len); +ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page); int rtrs_srv_reset_all_stats(struct rtrs_srv_stats *stats, bool enable); ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats, char *page, size_t len); -- cgit From 4b6afe9bc955bee44c0527005c3fb0edac91ac30 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Wed, 22 Sep 2021 14:53:29 +0200 Subject: RDMA/rtrs: Fix warning when use poll mode on client side. When testing with poll mode, it will fail and lead to warning below on client side: $ echo "sessname=bla path=gid:fe80::2:c903:4e:d0b3@gid:fe80::2:c903:8:ca17 device_path=/dev/nullb2 nr_poll_queues=-1" | \ sudo tee /sys/devices/virtual/rnbd-client/ctl/map_device rnbd_client L597: Mapping device /dev/nullb2 on session bla, (access_mode: rw, nr_poll_queues: 8) WARNING: CPU: 3 PID: 9886 at drivers/infiniband/core/cq.c:447 ib_cq_pool_get+0x26f/0x2a0 [ib_core] The problem is in case of poll queue, we need to still call ib_alloc_cq/ib_free_cq, we can't use cq_poll api for poll queue. As both client and server use shared function from rtrs, set irq_con_num to con_num on server side, which is number of total connection of the session, this way we can differ if the rtrs_con requires pollqueue. Following up patches will replace the duplicate code with helpers. Link: https://lore.kernel.org/r/20210922125333.351454-4-haris.iqbal@ionos.com Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Signed-off-by: Md Haris Iqbal Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 1 + drivers/infiniband/ulp/rtrs/rtrs.c | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 716ef7b23558..078a1cbac90c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1766,6 +1766,7 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, strscpy(sess->s.sessname, str, sizeof(sess->s.sessname)); sess->s.con_num = con_num; + sess->s.irq_con_num = con_num; sess->s.recon_cnt = recon_cnt; uuid_copy(&sess->s.uuid, uuid); spin_lock_init(&sess->state_lock); diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ca542e477d38..9bc323490ce3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -228,7 +228,12 @@ static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, struct rdma_cm_id *cm_id = con->cm_id; struct ib_cq *cq; - cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); + if (con->cid >= con->sess->irq_con_num) + cq = ib_alloc_cq(cm_id->device, con, nr_cqe, cq_vector, + poll_ctx); + else + cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); + if (IS_ERR(cq)) { rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n", PTR_ERR(cq)); @@ -283,7 +288,10 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, max_send_sge); if (err) { - ib_cq_pool_put(con->cq, con->nr_cqe); + if (con->cid >= con->sess->irq_con_num) + ib_free_cq(con->cq); + else + ib_cq_pool_put(con->cq, con->nr_cqe); con->cq = NULL; return err; } @@ -300,7 +308,10 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con) con->qp = NULL; } if (con->cq) { - ib_cq_pool_put(con->cq, con->nr_cqe); + if (con->cid >= con->sess->irq_con_num) + ib_free_cq(con->cq); + else + ib_cq_pool_put(con->cq, con->nr_cqe); con->cq = NULL; } } -- cgit From 36332ded46b6292296bc7170fada6e238a0802cc Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Wed, 22 Sep 2021 14:53:30 +0200 Subject: RDMA/rtrs: Replace duplicate check with is_pollqueue helper if (con->cid >= con->sess->irq_con_num) check can be replaced with a is_pollqueue helper. Link: https://lore.kernel.org/r/20210922125333.351454-5-haris.iqbal@ionos.com Signed-off-by: Jack Wang Signed-off-by: Md Haris Iqbal Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 9bc323490ce3..ac83cd97f838 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -222,13 +222,18 @@ static void qp_event_handler(struct ib_event *ev, void *ctx) } } +static bool is_pollqueue(struct rtrs_con *con) +{ + return con->cid >= con->sess->irq_con_num; +} + static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, enum ib_poll_context poll_ctx) { struct rdma_cm_id *cm_id = con->cm_id; struct ib_cq *cq; - if (con->cid >= con->sess->irq_con_num) + if (is_pollqueue(con)) cq = ib_alloc_cq(cm_id->device, con, nr_cqe, cq_vector, poll_ctx); else @@ -288,7 +293,7 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, max_send_sge); if (err) { - if (con->cid >= con->sess->irq_con_num) + if (is_pollqueue(con)) ib_free_cq(con->cq); else ib_cq_pool_put(con->cq, con->nr_cqe); @@ -308,7 +313,7 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con) con->qp = NULL; } if (con->cq) { - if (con->cid >= con->sess->irq_con_num) + if (is_pollqueue(con)) ib_free_cq(con->cq); else ib_cq_pool_put(con->cq, con->nr_cqe); -- cgit From 6f5649afd3984e35c4b862a05c4511c6d18b27af Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Wed, 22 Sep 2021 14:53:31 +0200 Subject: RDMA/rtrs: Introduce destroy_cq helper The same code snip used twice, to avoid duplicate, replace it with a destroy_cq helper. Link: https://lore.kernel.org/r/20210922125333.351454-6-haris.iqbal@ionos.com Signed-off-by: Jack Wang Signed-off-by: Md Haris Iqbal Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ac83cd97f838..37952c8e768c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -279,6 +279,17 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, return ret; } +static void destroy_cq(struct rtrs_con *con) +{ + if (con->cq) { + if (is_pollqueue(con)) + ib_free_cq(con->cq); + else + ib_cq_pool_put(con->cq, con->nr_cqe); + } + con->cq = NULL; +} + int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, u32 max_send_sge, int cq_vector, int nr_cqe, u32 max_send_wr, u32 max_recv_wr, @@ -293,11 +304,7 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, max_send_sge); if (err) { - if (is_pollqueue(con)) - ib_free_cq(con->cq); - else - ib_cq_pool_put(con->cq, con->nr_cqe); - con->cq = NULL; + destroy_cq(con); return err; } con->sess = sess; @@ -312,13 +319,7 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con) rdma_destroy_qp(con->cm_id); con->qp = NULL; } - if (con->cq) { - if (is_pollqueue(con)) - ib_free_cq(con->cq); - else - ib_cq_pool_put(con->cq, con->nr_cqe); - con->cq = NULL; - } + destroy_cq(con); } EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy); -- cgit From dea7bb3ad3e08f96815330f88a62c24d7a9dacae Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Wed, 22 Sep 2021 14:53:32 +0200 Subject: RDMA/rtrs: Do not allow sessname to contain special symbols / and . Allowing these characters in sessname can lead to unexpected results, particularly because / is used as a separator between files in a path, and . points to the current directory. Link: https://lore.kernel.org/r/20210922125333.351454-7-haris.iqbal@ionos.com Signed-off-by: Md Haris Iqbal Reviewed-by: Gioh Kim Reviewed-by: Aleksei Marov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 6 ++++++ drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index bc8824b4ee0d..15c0077dd27e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2788,6 +2788,12 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, struct rtrs_clt *clt; int err, i; + if (strchr(sessname, '/') || strchr(sessname, '.')) { + pr_err("sessname cannot contain / and .\n"); + err = -EINVAL; + goto out; + } + clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, ops->link_ev, reconnect_delay_sec, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 078a1cbac90c..7df71f8cf149 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -803,6 +803,11 @@ static int process_info_req(struct rtrs_srv_con *con, return err; } + if (strchr(msg->sessname, '/') || strchr(msg->sessname, '.')) { + rtrs_err(s, "sessname cannot contain / and .\n"); + return -EINVAL; + } + if (exist_sessname(sess->srv->ctx, msg->sessname, &sess->srv->paths_uuid)) { rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname); -- cgit From 3f3fe682f28d60e9a9a0e44cf4fa7a8a920a1d43 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Wed, 22 Sep 2021 14:53:33 +0200 Subject: RDMA/rtrs-clt: Follow "one entry one value" rule for IO migration stats This commit divides the sysfs entry cpu_migration into 2 different entries One for "from cpus" and the other for "to cpus". Link: https://lore.kernel.org/r/20210922125333.351454-8-haris.iqbal@ionos.com Signed-off-by: Md Haris Iqbal Reviewed-by: Gioh Kim Reviewed-by: Aleksei Marov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c | 27 ++++++++++++++++++--------- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 11 ++++++++--- drivers/infiniband/ulp/rtrs/rtrs-clt.h | 3 ++- 3 files changed, 28 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c index 61d5e0018392..f7e459fe68be 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c @@ -37,29 +37,38 @@ void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats) s->rdma.failover_cnt++; } -int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) +int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) { struct rtrs_clt_stats_pcpu *s; size_t used; int cpu; - used = sysfs_emit(buf, " "); - for_each_possible_cpu(cpu) - used += sysfs_emit_at(buf, used, " CPU%u", cpu); - - used += sysfs_emit_at(buf, used, "\nfrom:"); + used = 0; for_each_possible_cpu(cpu) { s = per_cpu_ptr(stats->pcpu_stats, cpu); - used += sysfs_emit_at(buf, used, " %d", + used += sysfs_emit_at(buf, used, "%d ", atomic_read(&s->cpu_migr.from)); } - used += sysfs_emit_at(buf, used, "\nto :"); + used += sysfs_emit_at(buf, used, "\n"); + + return used; +} + +int rtrs_clt_stats_migration_to_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) +{ + struct rtrs_clt_stats_pcpu *s; + + size_t used; + int cpu; + + used = 0; for_each_possible_cpu(cpu) { s = per_cpu_ptr(stats->pcpu_stats, cpu); - used += sysfs_emit_at(buf, used, " %d", s->cpu_migr.to); + used += sysfs_emit_at(buf, used, "%d ", s->cpu_migr.to); } + used += sysfs_emit_at(buf, used, "\n"); return used; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index 4ee592ccf979..0e69180c3771 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -296,8 +296,12 @@ static struct kobj_attribute rtrs_clt_remove_path_attr = __ATTR(remove_path, 0644, rtrs_clt_remove_path_show, rtrs_clt_remove_path_store); -STAT_ATTR(struct rtrs_clt_stats, cpu_migration, - rtrs_clt_stats_migration_cnt_to_str, +STAT_ATTR(struct rtrs_clt_stats, cpu_migration_from, + rtrs_clt_stats_migration_from_cnt_to_str, + rtrs_clt_reset_cpu_migr_stats); + +STAT_ATTR(struct rtrs_clt_stats, cpu_migration_to, + rtrs_clt_stats_migration_to_cnt_to_str, rtrs_clt_reset_cpu_migr_stats); STAT_ATTR(struct rtrs_clt_stats, reconnects, @@ -313,7 +317,8 @@ STAT_ATTR(struct rtrs_clt_stats, reset_all, rtrs_clt_reset_all_stats); static struct attribute *rtrs_clt_stats_attrs[] = { - &cpu_migration_attr.attr, + &cpu_migration_from_attr.attr, + &cpu_migration_to_attr.attr, &reconnects_attr.attr, &rdma_attr.attr, &reset_all_attr.attr, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 6d81aae53df4..9afffccff973 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -226,7 +226,8 @@ int rtrs_clt_reset_rdma_lat_distr_stats(struct rtrs_clt_stats *stats, ssize_t rtrs_clt_stats_rdma_lat_distr_to_str(struct rtrs_clt_stats *stats, char *page); int rtrs_clt_reset_cpu_migr_stats(struct rtrs_clt_stats *stats, bool enable); -int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); +int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); +int rtrs_clt_stats_migration_to_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); int rtrs_clt_reset_reconnects_stat(struct rtrs_clt_stats *stats, bool enable); int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf); int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable); -- cgit From b68362304bcfc697d755d29f8075ec6f24dece32 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Sun, 26 Sep 2021 11:31:43 +0300 Subject: RDMA/mlx5: Avoid taking MRs from larger MR cache pools when a pool is empty Currently, if a cache entry is empty, the driver will try to take MRs from larger cache entries. This behavior consumes a lot of memory. In addition, when searching for an mkey in an entry, the entry is locked. When using a multithreaded application with the old behavior, the threads will block each other more often, which can hurt performance as can be seen in the table below. Therefore, avoid it by creating a new mkey when the requested cache entry is empty. The test was performed on a machine with Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz 44 cores. Here are the time measures for allocating MRs of 2^6 pages. The search in the cache started from entry 6. +------------+---------------------+---------------------+ | | Old behavior | New behavior | | +----------+----------+----------+----------+ | | 1 thread | 5 thread | 1 thread | 5 thread | +============+==========+==========+==========+==========+ | 1,000 MRs | 14 ms | 30 ms | 14 ms | 80 ms | +------------+----------+----------+----------+----------+ | 10,000 MRs | 135 ms | 6 sec | 173 ms | 880 ms | +------------+----------+----------+----------+----------+ |100,000 MRs | 11.2 sec | 57 sec | 1.74 sec | 8.8 sec | +------------+----------+----------+----------+----------+ Link: https://lore.kernel.org/r/71af2770c737b936f7b10f457f0ef303ffcf7ad7.1632644527.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3be36ebbf67a..b4d2322e9ca5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -600,29 +600,21 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, /* Return a MR already available in the cache */ static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent = req_ent; - /* Try larger MR pools from the cache to satisfy the allocation */ - for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { - mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, - ent - dev->cache.ent); - - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; - } + spin_lock_irq(&ent->lock); + if (!list_empty(&ent->head)) { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); + mlx5_clear_mr(mr); + return mr; } + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); req_ent->miss++; return NULL; } -- cgit From 8e913a8d89cd91fcc22b496b8329b0b093a6dec9 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 1 Oct 2021 15:32:15 -0600 Subject: RDMA/rw: switch to dma_map_sgtable() There are a couple of subtle error path bugs related to mapping the sgls: - In rdma_rw_ctx_init(), dma_unmap would be called with an sg that could have been incremented from the original call, as well as an nents that is the dma mapped entries not the original number of nents called when mapped. - Similarly in rdma_rw_ctx_signature_init, both sg and prot_sg were unmapped with the incorrect number of nents. To fix this, switch to the sgtable interface for mapping which conveniently stores the original nents for unmapping. This will get cleaned up further once the dma mapping interface supports P2PDMA and pci_p2pdma_map_sg() can be removed. Fixes: 0e353e34e1e7 ("IB/core: add RW API support for signature MRs") Fixes: a060b5629ab0 ("IB/core: generic RDMA READ/WRITE API") Link: https://lore.kernel.org/r/20211001213215.3761-1-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rw.c | 66 +++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 25 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5221cce65675..5a3bd41b331c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -282,15 +282,22 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, ib_dma_unmap_sg(dev, sg, sg_cnt, dir); } -static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, - u32 sg_cnt, enum dma_data_direction dir) +static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, + enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) { + int nents; + + if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) return 0; - return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, + sgt->orig_nents, dir); + if (!nents) + return -EIO; + sgt->nents = nents; + return 0; } - return ib_dma_map_sg(dev, sg, sg_cnt, dir); + return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); } /** @@ -313,12 +320,16 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; + struct sg_table sgt = { + .sgl = sg, + .orig_nents = sg_cnt, + }; int ret; - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); - if (!ret) - return -ENOMEM; - sg_cnt = ret; + ret = rdma_rw_map_sgtable(dev, &sgt, dir); + if (ret) + return ret; + sg_cnt = sgt.nents; /* * Skip to the S/G entry that sg_offset falls into: @@ -354,7 +365,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, return ret; out_unmap_sg: - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -385,6 +396,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, struct ib_device *dev = qp->pd->device; u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, qp->integrity_en); + struct sg_table sgt = { + .sgl = sg, + .orig_nents = sg_cnt, + }; + struct sg_table prot_sgt = { + .sgl = prot_sg, + .orig_nents = prot_sg_cnt, + }; struct ib_rdma_wr *rdma_wr; int count = 0, ret; @@ -394,18 +413,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -EINVAL; } - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); - if (!ret) - return -ENOMEM; - sg_cnt = ret; + ret = rdma_rw_map_sgtable(dev, &sgt, dir); + if (ret) + return ret; if (prot_sg_cnt) { - ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir); - if (!ret) { - ret = -ENOMEM; + ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); + if (ret) goto out_unmap_sg; - } - prot_sg_cnt = ret; } ctx->type = RDMA_RW_SIG_MR; @@ -426,10 +441,11 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); - ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg, - prot_sg_cnt, NULL, SZ_4K); + ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg, + prot_sgt.nents, NULL, SZ_4K); if (unlikely(ret)) { - pr_err("failed to map PI sg (%u)\n", sg_cnt + prot_sg_cnt); + pr_err("failed to map PI sg (%u)\n", + sgt.nents + prot_sgt.nents); goto out_destroy_sig_mr; } @@ -468,10 +484,10 @@ out_destroy_sig_mr: out_free_ctx: kfree(ctx->reg); out_unmap_prot_sg: - if (prot_sg_cnt) - rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); + if (prot_sgt.nents) + rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); out_unmap_sg: - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_signature_init); -- cgit From 286dba65a4a65a6d5de011767061f6ffa6e10389 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Oct 2021 15:31:53 +0300 Subject: IB/hf1: Use string_upper() instead of an open coded variant Use string_upper() from the string helper module instead of an open coded variant. Link: https://lore.kernel.org/r/20211001123153.67379-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/efivar.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index f275dd1abed8..e8ed05516bf2 100644 --- a/drivers/infiniband/hw/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c @@ -3,7 +3,9 @@ * Copyright(c) 2015, 2016 Intel Corporation. */ -#include +#include +#include + #include "efivar.h" /* GUID for HFI1 variables in EFI */ @@ -112,7 +114,6 @@ int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind, char prefix_name[64]; char name[64]; int result; - int i; /* create a common prefix */ snprintf(prefix_name, sizeof(prefix_name), "%04x:%02x:%02x.%x", @@ -128,10 +129,7 @@ int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind, * variable. */ if (result) { - /* Converting to uppercase */ - for (i = 0; prefix_name[i]; i++) - if (isalpha(prefix_name[i])) - prefix_name[i] = toupper(prefix_name[i]); + string_upper(prefix_name, prefix_name); snprintf(name, sizeof(name), "%s-%s", prefix_name, kind); result = read_efi_var(name, size, return_data); } -- cgit From 0de71d7adaf03b5568d3bd34c7409d97274a4542 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 6 Oct 2021 16:15:31 -0400 Subject: RDMA/irdma: Delete unused struct irdma_bth The struct irdma_bth is not used, so remove it. Link: https://lore.kernel.org/r/20211006201531.469650-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h index d03cd29333ea..1fbe72e18625 100644 --- a/drivers/infiniband/hw/irdma/cm.h +++ b/drivers/infiniband/hw/irdma/cm.h @@ -159,14 +159,6 @@ enum irdma_cm_event_type { IRDMA_CM_EVENT_ABORTED, }; -struct irdma_bth { /* Base Trasnport Header */ - u8 opcode; - u8 flags; - __be16 pkey; - __be32 qpn; - __be32 apsn; -}; - struct ietf_mpa_v1 { u8 key[IETF_MPA_KEY_SIZE]; u8 flags; -- cgit From 1cf2ce8272802e677398fab47a73713bc6e1fd5c Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 30 Sep 2021 17:48:10 +0800 Subject: RDMA/rxe: Remove the is_user members of struct rxe_sq/rxe_rq/rxe_srq The is_user members of struct rxe_sq/rxe_rq/rxe_srq are unsed since commit ae6e843fe08d ("RDMA/rxe: Add memory barriers to kernel queues"). In this case, it is fine to remove them directly. Link: https://lore.kernel.org/r/20210930094813.226888-2-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 2 -- drivers/infiniband/sw/rxe/rxe_srq.c | 1 - drivers/infiniband/sw/rxe/rxe_verbs.c | 3 --- drivers/infiniband/sw/rxe/rxe_verbs.h | 3 --- 4 files changed, 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index c8f4790083d2..975321812c87 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -307,8 +307,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->rq.producer_lock); spin_lock_init(&qp->rq.consumer_lock); - qp->rq.is_user = qp->is_user; - skb_queue_head_init(&qp->resp_pkts); rxe_init_task(rxe, &qp->resp.task, qp, diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index a9e7817e2732..eb1c4c3b3a78 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -86,7 +86,6 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, srq->srq_num = srq->pelem.index; srq->rq.max_wr = init->attr.max_wr; srq->rq.max_sge = init->attr.max_sge; - srq->rq.is_user = srq->is_user; srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9d0bb9aa7514..c49ba0381964 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -267,9 +267,6 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, if (udata->outlen < sizeof(*uresp)) return -EINVAL; uresp = udata->outbuf; - srq->is_user = true; - } else { - srq->is_user = false; } err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index c807639435eb..962bb2481a28 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -77,7 +77,6 @@ enum wqe_state { }; struct rxe_sq { - bool is_user; int max_wr; int max_sge; int max_inline; @@ -86,7 +85,6 @@ struct rxe_sq { }; struct rxe_rq { - bool is_user; int max_wr; int max_sge; spinlock_t producer_lock; /* guard queue producer */ @@ -100,7 +98,6 @@ struct rxe_srq { struct rxe_pd *pd; struct rxe_rq rq; u32 srq_num; - bool is_user; int limit; int error; -- cgit From 609bb8c3a3f5652d5fb66373c6e539ae6bd6be2f Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 30 Sep 2021 17:48:11 +0800 Subject: RDMA/rxe: Change the is_user member of struct rxe_cq to bool Make the is_user members of struct rxe_qp/rxe_cq has the same type. Link: https://lore.kernel.org/r/20210930094813.226888-3-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_cq.c | 3 +-- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 4eedaa0244b3..6848426c074f 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -77,8 +77,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, return err; } - if (uresp) - cq->is_user = 1; + cq->is_user = uresp; cq->is_dying = false; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 962bb2481a28..098fde693dbd 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -64,7 +64,7 @@ struct rxe_cq { spinlock_t cq_lock; u8 notify; bool is_dying; - int is_user; + bool is_user; struct tasklet_struct comp_task; }; -- cgit From 262d9fcf8530c8a64b72951197bfab44cb0d8c62 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 30 Sep 2021 17:48:12 +0800 Subject: RDMA/rxe: Set partial attributes when completion status != IBV_WC_SUCCESS As ibv_poll_cq()'s manual said, only partial attributes are valid when completion status != IBV_WC_SUCCESS. Link: https://lore.kernel.org/r/20210930094813.226888-4-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 45 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 48a3864ada29..d771ba8449a1 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -380,30 +380,35 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_cqe *cqe) { + struct ib_wc *wc = &cqe->ibwc; + struct ib_uverbs_wc *uwc = &cqe->uibwc; + memset(cqe, 0, sizeof(*cqe)); if (!qp->is_user) { - struct ib_wc *wc = &cqe->ibwc; - - wc->wr_id = wqe->wr.wr_id; - wc->status = wqe->status; - wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - wc->wc_flags = IB_WC_WITH_IMM; - wc->byte_len = wqe->dma.length; - wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->qp = &qp->ibqp; } else { - struct ib_uverbs_wc *uwc = &cqe->uibwc; - - uwc->wr_id = wqe->wr.wr_id; - uwc->status = wqe->status; - uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - uwc->wc_flags = IB_WC_WITH_IMM; - uwc->byte_len = wqe->dma.length; - uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->qp_num = qp->ibqp.qp_num; + } + + if (wqe->status == IB_WC_SUCCESS) { + if (!qp->is_user) { + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + } else { + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + } } } -- cgit From 115fda3509e782c376467e43f9d490b7f7884daa Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Thu, 30 Sep 2021 17:48:13 +0800 Subject: RDMA/rxe: Remove duplicate settings Remove duplicate settings for vendor_err and qp_num. Link: https://lore.kernel.org/r/20210930094813.226888-5-yangx.jy@fujitsu.com Signed-off-by: Xiao Yang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 4af0dc95784a..e8f435fa6e4d 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -860,7 +860,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, wc->opcode = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; - wc->vendor_err = 0; wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? qp->resp.length : wqe->dma.length - wqe->dma.resid; @@ -881,8 +880,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, uwc->ex.invalidate_rkey = ieth_rkey(pkt); } - uwc->qp_num = qp->ibqp.qp_num; - if (pkt->mask & RXE_DETH_MASK) uwc->src_qp = deth_sqp(pkt); @@ -914,7 +911,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, if (pkt->mask & RXE_DETH_MASK) wc->src_qp = deth_sqp(pkt); - wc->qp = &qp->ibqp; wc->port_num = qp->attr.port_num; } } -- cgit From 2a152512a155aaf27c3e67834ffafaed9525a7b5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 3 Oct 2021 13:56:04 +0300 Subject: RDMA/efa: CQ notifications This patch adds support for CQ notifications through the standard verbs api. In order to achieve that, a new event queue (EQ) object is introduced, which is in charge of reporting completion events to the driver. On driver load, EQs are allocated and their affinity is set to a single cpu. When a user app creates a CQ with a completion channel, the completion vector number is converted to a EQ number, which is in charge of reporting the CQ events. In addition, the CQ creation admin command now returns an offset for the CQ doorbell, which is mapped to the userspace provider and is used to arm the CQ when requested by the user. The EQs use a single doorbell (located on the registers BAR), which encodes the EQ number and arm as part of the doorbell value. The EQs are polled by the driver on each new EQE, and arm it when the poll is completed. Link: https://lore.kernel.org/r/20211003105605.29222-1-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 19 ++- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 100 ++++++++++++- drivers/infiniband/hw/efa/efa_admin_defs.h | 41 ++++++ drivers/infiniband/hw/efa/efa_com.c | 164 +++++++++++++++++++++ drivers/infiniband/hw/efa/efa_com.h | 38 ++++- drivers/infiniband/hw/efa/efa_com_cmd.c | 35 +++-- drivers/infiniband/hw/efa/efa_com_cmd.h | 10 +- drivers/infiniband/hw/efa/efa_main.c | 181 ++++++++++++++++++++---- drivers/infiniband/hw/efa/efa_regs_defs.h | 7 +- drivers/infiniband/hw/efa/efa_verbs.c | 67 ++++++++- 10 files changed, 610 insertions(+), 52 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 87b1dadeb7fe..e0c9ba4c70f4 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -20,14 +20,14 @@ #define EFA_IRQNAME_SIZE 40 -/* 1 for AENQ + ADMIN */ -#define EFA_NUM_MSIX_VEC 1 #define EFA_MGMNT_MSIX_VEC_IDX 0 +#define EFA_COMP_EQS_VEC_BASE 1 struct efa_irq { irq_handler_t handler; void *data; u32 irqn; + u32 vector; cpumask_t affinity_hint_mask; char name[EFA_IRQNAME_SIZE]; }; @@ -61,6 +61,13 @@ struct efa_dev { struct efa_irq admin_irq; struct efa_stats stats; + + /* Array of completion EQs */ + struct efa_eq *eqs; + unsigned int neqs; + + /* Only stores CQs with interrupts enabled */ + struct xarray cqs_xa; }; struct efa_ucontext { @@ -84,8 +91,11 @@ struct efa_cq { dma_addr_t dma_addr; void *cpu_addr; struct rdma_user_mmap_entry *mmap_entry; + struct rdma_user_mmap_entry *db_mmap_entry; size_t size; u16 cq_idx; + /* NULL when no interrupts requested */ + struct efa_eq *eq; }; struct efa_qp { @@ -116,6 +126,11 @@ struct efa_ah { u8 id[EFA_GID_SIZE]; }; +struct efa_eq { + struct efa_com_eq eeq; + struct efa_irq irq; +}; + int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index fa38b34eddb8..0b0b93b529f3 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -28,7 +28,9 @@ enum efa_admin_aq_opcode { EFA_ADMIN_DEALLOC_PD = 15, EFA_ADMIN_ALLOC_UAR = 16, EFA_ADMIN_DEALLOC_UAR = 17, - EFA_ADMIN_MAX_OPCODE = 17, + EFA_ADMIN_CREATE_EQ = 18, + EFA_ADMIN_DESTROY_EQ = 19, + EFA_ADMIN_MAX_OPCODE = 19, }; enum efa_admin_aq_feature_id { @@ -38,6 +40,7 @@ enum efa_admin_aq_feature_id { EFA_ADMIN_QUEUE_ATTR = 4, EFA_ADMIN_HW_HINTS = 5, EFA_ADMIN_HOST_INFO = 6, + EFA_ADMIN_EVENT_QUEUE_ATTR = 7, }; /* QP transport type */ @@ -430,8 +433,8 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : reserved5 - MBZ * 5 : interrupt_mode_enabled - if set, cq operates - * in interrupt mode (i.e. CQ events and MSI-X are - * generated), otherwise - polling + * in interrupt mode (i.e. CQ events and EQ elements + * are generated), otherwise - polling * 6 : virt - If set, ring base address is virtual * (IOVA returned by MR registration) * 7 : reserved6 - MBZ @@ -448,8 +451,11 @@ struct efa_admin_create_cq_cmd { /* completion queue depth in # of entries. must be power of 2 */ u16 cq_depth; - /* msix vector assigned to this cq */ - u32 msix_vector_idx; + /* EQ number assigned to this cq */ + u16 eqn; + + /* MBZ */ + u16 reserved; /* * CQ ring base address, virtual or physical depending on 'virt' @@ -480,6 +486,15 @@ struct efa_admin_create_cq_resp { /* actual cq depth in number of entries */ u16 cq_actual_depth; + + /* CQ doorbell address, as offset to PCIe DB BAR */ + u32 db_offset; + + /* + * 0 : db_valid - If set, doorbell offset is valid. + * Always set when interrupts are requested. + */ + u32 flags; }; struct efa_admin_destroy_cq_cmd { @@ -669,6 +684,17 @@ struct efa_admin_feature_queue_attr_desc { u16 max_tx_batch; }; +struct efa_admin_event_queue_attr_desc { + /* The maximum number of event queues supported */ + u32 max_eq; + + /* Maximum number of EQEs per Event Queue */ + u32 max_eq_depth; + + /* Supported events bitmask */ + u32 event_bitmask; +}; + struct efa_admin_feature_aenq_desc { /* bitmask for AENQ groups the device can report */ u32 supported_groups; @@ -727,6 +753,8 @@ struct efa_admin_get_feature_resp { struct efa_admin_feature_queue_attr_desc queue_attr; + struct efa_admin_event_queue_attr_desc event_queue_attr; + struct efa_admin_hw_hints hw_hints; } u; }; @@ -810,6 +838,60 @@ struct efa_admin_dealloc_uar_resp { struct efa_admin_acq_common_desc acq_common_desc; }; +struct efa_admin_create_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* Size of the EQ in entries, must be power of 2 */ + u16 depth; + + /* MSI-X table entry index */ + u8 msix_vec; + + /* + * 4:0 : entry_size_words - size of EQ entry in + * 32-bit words + * 7:5 : reserved - MBZ + */ + u8 caps; + + /* EQ ring base address */ + struct efa_common_mem_addr ba; + + /* + * Enabled events on this EQ + * 0 : completion_events - Enable completion events + * 31:1 : reserved - MBZ + */ + u32 event_bitmask; + + /* MBZ */ + u32 reserved; +}; + +struct efa_admin_create_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + /* asynchronous event notification groups */ enum efa_admin_aenq_group { EFA_ADMIN_FATAL_ERROR = 1, @@ -899,10 +981,18 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +/* create_cq_resp */ +#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) + /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) +/* create_eq_cmd */ +#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) +#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) + /* host_info */ #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) #define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8) diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index 78ff9389ae25..83f20c38a840 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -118,6 +118,43 @@ struct efa_admin_aenq_entry { u32 inline_data_w4[12]; }; +enum efa_admin_eqe_event_type { + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0, +}; + +/* Completion event */ +struct efa_admin_comp_event { + /* CQ number */ + u16 cqn; + + /* MBZ */ + u16 reserved; + + /* MBZ */ + u32 reserved2; +}; + +/* Event Queue Element */ +struct efa_admin_eqe { + /* + * 0 : phase + * 8:1 : event_type - Event type + * 31:9 : reserved - MBZ + */ + u32 common; + + /* MBZ */ + u32 reserved; + + union { + /* Event data */ + u32 event_data[2]; + + /* Completion Event */ + struct efa_admin_comp_event comp_event; + } u; +}; + /* aq_common_desc */ #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) @@ -131,4 +168,8 @@ struct efa_admin_aenq_entry { /* aenq_common_desc */ #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) +/* eqe */ +#define EFA_ADMIN_EQE_PHASE_MASK BIT(0) +#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1) + #endif /* _EFA_ADMIN_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 0d523ad736c7..16a24a05fc2a 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -56,11 +56,19 @@ static const char *efa_com_cmd_str(u8 cmd) EFA_CMD_STR_CASE(DEALLOC_PD); EFA_CMD_STR_CASE(ALLOC_UAR); EFA_CMD_STR_CASE(DEALLOC_UAR); + EFA_CMD_STR_CASE(CREATE_EQ); + EFA_CMD_STR_CASE(DESTROY_EQ); default: return "unknown command opcode"; } #undef EFA_CMD_STR_CASE } +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) +{ + *addr_low = lower_32_bits(addr); + *addr_high = upper_32_bits(addr); +} + static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) { struct efa_com_mmio_read *mmio_read = &edev->mmio_read; @@ -1081,3 +1089,159 @@ int efa_com_dev_reset(struct efa_com_dev *edev, return 0; } + +static int efa_com_create_eq(struct efa_com_dev *edev, + struct efa_com_create_eq_params *params, + struct efa_com_create_eq_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_create_eq_resp resp = {}; + struct efa_admin_create_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ; + EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS, + params->entry_size_in_bytes / 4); + cmd.depth = params->depth; + cmd.event_bitmask = params->event_bitmask; + cmd.msix_vec = params->msix_vec; + + efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high, + &cmd.ba.mem_addr_low); + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to create eq[%d]\n", err); + return err; + } + + result->eqn = resp.eqn; + + return 0; +} + +static void efa_com_destroy_eq(struct efa_com_dev *edev, + struct efa_com_destroy_eq_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_destroy_eq_resp resp = {}; + struct efa_admin_destroy_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ; + cmd.eqn = params->eqn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) + ibdev_err_ratelimited(edev->efa_dev, + "Failed to destroy EQ-%u [%d]\n", cmd.eqn, + err); +} + +static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + u32 val = 0; + + EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn); + EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1); + + writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF); +} + +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq) +{ + struct efa_admin_eqe *eqe; + u32 processed = 0; + u8 phase; + u32 ci; + + ci = eeq->cc & (eeq->depth - 1); + phase = eeq->phase; + eqe = &eeq->eqes[ci]; + + /* Go over all the events */ + while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + + eeq->cb(eeq, eqe); + + /* Get next event entry */ + ci++; + processed++; + + if (ci == eeq->depth) { + ci = 0; + phase = !phase; + } + + eqe = &eeq->eqes[ci]; + } + + eeq->cc += processed; + eeq->phase = phase; + efa_com_arm_eq(eeq->edev, eeq); +} + +void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + struct efa_com_destroy_eq_params params = { + .eqn = eeq->eqn, + }; + + efa_com_destroy_eq(edev, ¶ms); + dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes), + eeq->eqes, eeq->dma_addr); +} + +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec) +{ + struct efa_com_create_eq_params params = {}; + struct efa_com_create_eq_result result = {}; + int err; + + params.depth = depth; + params.entry_size_in_bytes = sizeof(*eeq->eqes); + EFA_SET(¶ms.event_bitmask, + EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1); + params.msix_vec = msix_vec; + + eeq->eqes = dma_alloc_coherent(edev->dmadev, + params.depth * sizeof(*eeq->eqes), + ¶ms.dma_addr, GFP_KERNEL); + if (!eeq->eqes) + return -ENOMEM; + + err = efa_com_create_eq(edev, ¶ms, &result); + if (err) + goto err_free_coherent; + + eeq->eqn = result.eqn; + eeq->edev = edev; + eeq->dma_addr = params.dma_addr; + eeq->phase = 1; + eeq->depth = params.depth; + eeq->cb = cb; + efa_com_arm_eq(edev, eeq); + + return 0; + +err_free_coherent: + dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes), + eeq->eqes, params.dma_addr); + return err; +} diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index 5e4c88877ddb..77282234ce68 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -80,6 +80,9 @@ struct efa_com_admin_queue { }; struct efa_aenq_handlers; +struct efa_com_eq; +typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq, + struct efa_admin_eqe *eqe); struct efa_com_aenq { struct efa_admin_aenq_entry *entries; @@ -112,6 +115,33 @@ struct efa_com_dev { struct efa_com_mmio_read mmio_read; }; +struct efa_com_eq { + struct efa_com_dev *edev; + struct efa_admin_eqe *eqes; + dma_addr_t dma_addr; + u32 cc; /* Consumer counter */ + u16 eqn; + u16 depth; + u8 phase; + efa_eqe_handler cb; +}; + +struct efa_com_create_eq_params { + dma_addr_t dma_addr; + u32 event_bitmask; + u16 depth; + u8 entry_size_in_bytes; + u8 msix_vec; +}; + +struct efa_com_create_eq_result { + u16 eqn; +}; + +struct efa_com_destroy_eq_params { + u16 eqn; +}; + typedef void (*efa_aenq_handler)(void *data, struct efa_admin_aenq_entry *aenq_e); @@ -121,9 +151,13 @@ struct efa_aenq_handlers { efa_aenq_handler unimplemented_handler; }; +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_admin_init(struct efa_com_dev *edev, struct efa_aenq_handlers *aenq_handlers); void efa_com_admin_destroy(struct efa_com_dev *edev); +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec); +void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq); int efa_com_dev_reset(struct efa_com_dev *edev, enum efa_regs_reset_reason_types reset_reason); void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); @@ -140,5 +174,7 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, struct efa_admin_acq_entry *comp, size_t comp_size); void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq); #endif /* _EFA_COM_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index f752ef64159c..fb405da4e1db 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,17 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" #include "efa_com_cmd.h" -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) -{ - *addr_low = lower_32_bits(addr); - *addr_high = upper_32_bits(addr); -} - int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res) @@ -157,7 +151,7 @@ int efa_com_create_cq(struct efa_com_dev *edev, struct efa_com_create_cq_params *params, struct efa_com_create_cq_result *result) { - struct efa_admin_create_cq_resp cmd_completion; + struct efa_admin_create_cq_resp cmd_completion = {}; struct efa_admin_create_cq_cmd create_cmd = {}; struct efa_com_admin_queue *aq = &edev->aq; int err; @@ -169,6 +163,11 @@ int efa_com_create_cq(struct efa_com_dev *edev, create_cmd.cq_depth = params->cq_depth; create_cmd.num_sub_cqs = params->num_sub_cqs; create_cmd.uar = params->uarn; + if (params->interrupt_mode_enabled) { + EFA_SET(&create_cmd.cq_caps_1, + EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); + create_cmd.eqn = params->eqn; + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, @@ -187,6 +186,9 @@ int efa_com_create_cq(struct efa_com_dev *edev, result->cq_idx = cmd_completion.cq_idx; result->actual_depth = params->cq_depth; + result->db_off = cmd_completion.db_offset; + result->db_valid = EFA_GET(&cmd_completion.flags, + EFA_ADMIN_CREATE_CQ_RESP_DB_VALID); return 0; } @@ -497,6 +499,23 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, sizeof(resp.u.network_attr.addr)); result->mtu = resp.u.network_attr.mtu; + if (efa_com_check_supported_feature_id(edev, + EFA_ADMIN_EVENT_QUEUE_ATTR)) { + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_EVENT_QUEUE_ATTR); + if (err) { + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to get event queue attributes %d\n", + err); + return err; + } + + result->max_eq = resp.u.event_queue_attr.max_eq; + result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth; + result->event_bitmask = resp.u.event_queue_attr.event_bitmask; + } + return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index eea4ebfbe6ec..c33010bbf9e8 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -73,7 +73,9 @@ struct efa_com_create_cq_params { u16 cq_depth; u16 num_sub_cqs; u16 uarn; + u16 eqn; u8 entry_size_in_bytes; + bool interrupt_mode_enabled; }; struct efa_com_create_cq_result { @@ -81,6 +83,8 @@ struct efa_com_create_cq_result { u16 cq_idx; /* actual cq depth in # of entries */ u16 actual_depth; + u32 db_off; + bool db_valid; }; struct efa_com_destroy_cq_params { @@ -125,6 +129,9 @@ struct efa_com_get_device_attr_result { u32 max_llq_size; u32 max_rdma_size; u32 device_caps; + u32 max_eq; + u32 max_eq_depth; + u32 event_bitmask; /* EQ events bitmask */ u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; @@ -260,7 +267,6 @@ union efa_com_get_stats_result { struct efa_com_rdma_read_stats rdma_read_stats; }; -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 417dea5f90cf..d2a445f6f8e5 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -67,6 +67,47 @@ static void efa_release_bars(struct efa_dev *dev, int bars_mask) pci_release_selected_regions(pdev, release_bars); } +static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe) +{ + u16 cqn = eqe->u.comp_event.cqn; + struct efa_cq *cq; + + /* Safe to load as we're in irq and removal calls synchronize_irq() */ + cq = xa_load(&dev->cqs_xa, cqn); + if (unlikely(!cq)) { + ibdev_err_ratelimited(&dev->ibdev, + "Completion event on non-existent CQ[%u]", + cqn); + return; + } + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe) +{ + struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev); + + if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) == + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION)) + efa_process_comp_eqe(dev, eqe); + else + ibdev_err_ratelimited(&dev->ibdev, + "Unknown event type received %lu", + EFA_GET(&eqe->common, + EFA_ADMIN_EQE_EVENT_TYPE)); +} + +static irqreturn_t efa_intr_msix_comp(int irq, void *data) +{ + struct efa_eq *eq = data; + struct efa_com_dev *edev = eq->eeq.edev; + + efa_com_eq_comp_intr_handler(edev, &eq->eeq); + + return IRQ_HANDLED; +} + static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) { struct efa_dev *dev = data; @@ -77,26 +118,43 @@ static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) return IRQ_HANDLED; } -static int efa_request_mgmnt_irq(struct efa_dev *dev) +static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) { - struct efa_irq *irq; int err; - irq = &dev->admin_irq; err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data); if (err) { - dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", - err); + dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n", + irq->name, err); return err; } - dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", - nr_cpumask_bits, &irq->affinity_hint_mask, irq->irqn); irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask); return 0; } +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, + int vector) +{ + u32 cpu; + + cpu = vector - EFA_COMP_EQS_VEC_BASE; + snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu, + pci_name(dev->pdev)); + eq->irq.handler = efa_intr_msix_comp; + eq->irq.data = eq; + eq->irq.vector = vector; + eq->irq.irqn = pci_irq_vector(dev->pdev, vector); + cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask); +} + +static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq) +{ + irq_set_affinity_hint(irq->irqn, NULL); + free_irq(irq->irqn, irq->data); +} + static void efa_setup_mgmnt_irq(struct efa_dev *dev) { u32 cpu; @@ -105,8 +163,9 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) "efa-mgmnt@pci:%s", pci_name(dev->pdev)); dev->admin_irq.handler = efa_intr_msix_mgmnt; dev->admin_irq.data = dev; - dev->admin_irq.irqn = - pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); + dev->admin_irq.vector = dev->admin_msix_vector_idx; + dev->admin_irq.irqn = pci_irq_vector(dev->pdev, + dev->admin_msix_vector_idx); cpu = cpumask_first(cpu_online_mask); cpumask_set_cpu(cpu, &dev->admin_irq.affinity_hint_mask); @@ -115,20 +174,11 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) dev->admin_irq.name); } -static void efa_free_mgmnt_irq(struct efa_dev *dev) -{ - struct efa_irq *irq; - - irq = &dev->admin_irq; - irq_set_affinity_hint(irq->irqn, NULL); - free_irq(irq->irqn, irq->data); -} - static int efa_set_mgmnt_irq(struct efa_dev *dev) { efa_setup_mgmnt_irq(dev); - return efa_request_mgmnt_irq(dev); + return efa_request_irq(dev, &dev->admin_irq); } static int efa_request_doorbell_bar(struct efa_dev *dev) @@ -234,6 +284,72 @@ static void efa_set_host_info(struct efa_dev *dev) dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); } +static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) +{ + efa_com_eq_destroy(&dev->edev, &eq->eeq); + efa_free_irq(dev, &eq->irq); +} + +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) +{ + int err; + + efa_setup_comp_irq(dev, eq, msix_vec); + err = efa_request_irq(dev, &eq->irq); + if (err) + return err; + + err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe, + dev->dev_attr.max_eq_depth, msix_vec); + if (err) + goto err_free_comp_irq; + + return 0; + +err_free_comp_irq: + efa_free_irq(dev, &eq->irq); + return err; +} + +static int efa_create_eqs(struct efa_dev *dev) +{ + unsigned int neqs = dev->dev_attr.max_eq; + int err; + int i; + + neqs = min_t(unsigned int, neqs, num_online_cpus()); + dev->neqs = neqs; + dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); + if (!dev->eqs) + return -ENOMEM; + + for (i = 0; i < neqs; i++) { + err = efa_create_eq(dev, &dev->eqs[i], + i + EFA_COMP_EQS_VEC_BASE); + if (err) + goto err_destroy_eqs; + } + + return 0; + +err_destroy_eqs: + for (i--; i >= 0; i--) + efa_destroy_eq(dev, &dev->eqs[i]); + kfree(dev->eqs); + + return err; +} + +static void efa_destroy_eqs(struct efa_dev *dev) +{ + int i; + + for (i = 0; i < dev->neqs; i++) + efa_destroy_eq(dev, &dev->eqs[i]); + + kfree(dev->eqs); +} + static const struct ib_device_ops efa_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_EFA, @@ -300,23 +416,29 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) goto err_release_doorbell_bar; + err = efa_create_eqs(dev); + if (err) + goto err_release_doorbell_bar; + efa_set_host_info(dev); dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; dev->ibdev.phys_port_cnt = 1; - dev->ibdev.num_comp_vectors = 1; + dev->ibdev.num_comp_vectors = dev->neqs ?: 1; dev->ibdev.dev.parent = &pdev->dev; ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) - goto err_release_doorbell_bar; + goto err_destroy_eqs; ibdev_info(&dev->ibdev, "IB device registered\n"); return 0; +err_destroy_eqs: + efa_destroy_eqs(dev); err_release_doorbell_bar: efa_release_doorbell_bar(dev); return err; @@ -324,9 +446,10 @@ err_release_doorbell_bar: static void efa_ib_device_remove(struct efa_dev *dev) { - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); + efa_destroy_eqs(dev); + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); efa_release_doorbell_bar(dev); } @@ -339,8 +462,12 @@ static int efa_enable_msix(struct efa_dev *dev) { int msix_vecs, irq_num; - /* Reserve the max msix vectors we might need */ - msix_vecs = EFA_NUM_MSIX_VEC; + /* + * Reserve the max msix vectors we might need, one vector is reserved + * for admin. + */ + msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev), + num_online_cpus() + 1); dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", msix_vecs); @@ -421,6 +548,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) edev->efa_dev = dev; edev->dmadev = &pdev->dev; dev->pdev = pdev; + xa_init(&dev->cqs_xa); bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); @@ -476,7 +604,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) return dev; err_free_mgmnt_irq: - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); err_disable_msix: efa_disable_msix(dev); err_reg_read_destroy: @@ -499,11 +627,12 @@ static void efa_remove_device(struct pci_dev *pdev) edev = &dev->edev; efa_com_admin_destroy(edev); - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); efa_com_mmio_reg_read_destroy(edev); devm_iounmap(&pdev->dev, edev->reg_bar); efa_release_bars(dev, EFA_BASE_BAR_MASK); + xa_destroy(&dev->cqs_xa); ib_dealloc_device(&dev->ibdev); pci_disable_device(pdev); } diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h index 4017982fe13b..714ae6258800 100644 --- a/drivers/infiniband/hw/efa/efa_regs_defs.h +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_REGS_H_ @@ -42,6 +42,7 @@ enum efa_regs_reset_reason_types { #define EFA_REGS_MMIO_REG_READ_OFF 0x5c #define EFA_REGS_MMIO_RESP_LO_OFF 0x60 #define EFA_REGS_MMIO_RESP_HI_OFF 0x64 +#define EFA_REGS_EQ_DB_OFF 0x68 /* version register */ #define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff @@ -93,4 +94,8 @@ enum efa_regs_reset_reason_types { #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 +/* eq_db register */ +#define EFA_REGS_EQ_DB_EQN_MASK 0xffff +#define EFA_REGS_EQ_DB_ARM_MASK 0x80000000 + #endif /* _EFA_REGS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index e5f9d90aad5e..3353ad4925ee 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -245,6 +245,9 @@ int efa_query_device(struct ib_device *ibdev, if (EFA_DEV_CAP(dev, RNR_RETRY)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; + if (dev->neqs) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { @@ -984,6 +987,12 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } +static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) +{ + rdma_user_mmap_entry_remove(cq->db_mmap_entry); + rdma_user_mmap_entry_remove(cq->mmap_entry); +} + int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); @@ -993,15 +1002,25 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); - rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_cq_user_mmap_entries_remove(cq); efa_destroy_cq_idx(dev, cq->cq_idx); + if (cq->eq) { + xa_erase(&dev->cqs_xa, cq->cq_idx); + synchronize_irq(cq->eq->irq.irqn); + } efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); return 0; } +static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) +{ + return &dev->eqs[vec]; +} + static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, - struct efa_ibv_create_cq_resp *resp) + struct efa_ibv_create_cq_resp *resp, + bool db_valid) { resp->q_mmap_size = cq->size; cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, @@ -1011,6 +1030,21 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, if (!cq->mmap_entry) return -ENOMEM; + if (db_valid) { + cq->db_mmap_entry = + efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + dev->db_bar_addr + resp->db_off, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->db_mmap_key); + if (!cq->db_mmap_entry) { + rdma_user_mmap_entry_remove(cq->mmap_entry); + return -ENOMEM; + } + + resp->db_off &= ~PAGE_MASK; + resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; + } + return 0; } @@ -1019,8 +1053,8 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, { struct efa_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct efa_ucontext, ibucontext); + struct efa_com_create_cq_params params = {}; struct efa_ibv_create_cq_resp resp = {}; - struct efa_com_create_cq_params params; struct efa_com_create_cq_result result; struct ib_device *ibdev = ibcq->device; struct efa_dev *dev = to_edev(ibdev); @@ -1065,7 +1099,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { ibdev_dbg(ibdev, "Incompatible ABI params, unknown fields in udata\n"); err = -EINVAL; @@ -1101,29 +1135,45 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { + cq->eq = efa_vec2eq(dev, attr->comp_vector); + params.eqn = cq->eq->eeq.eqn; + params.interrupt_mode_enabled = true; + } + err = efa_com_create_cq(&dev->edev, ¶ms, &result); if (err) goto err_free_mapped; + resp.db_off = result.db_off; resp.cq_idx = result.cq_idx; cq->cq_idx = result.cq_idx; cq->ibcq.cqe = result.actual_depth; WARN_ON_ONCE(entries != result.actual_depth); - err = cq_mmap_entries_setup(dev, cq, &resp); + err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); if (err) { ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", cq->cq_idx); goto err_destroy_cq; } + if (cq->eq) { + err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); + if (err) { + ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", + cq->cq_idx); + goto err_remove_mmap; + } + } + if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_remove_mmap; + goto err_xa_erase; } } @@ -1132,8 +1182,11 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_xa_erase: + if (cq->eq) + xa_erase(&dev->cqs_xa, cq->cq_idx); err_remove_mmap: - rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_cq_user_mmap_entries_remove(cq); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: -- cgit From 13f30b0fa0a9fa4f713edbb262f2e451886ce242 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:29 +0300 Subject: RDMA/counter: Add a descriptor in struct rdma_hw_stats Add a counter statistic descriptor structure in rdma_hw_stats. In addition to the counter name, more meta-information will be added. This code extension is needed for optional-counter support in the following patches. Link: https://lore.kernel.org/r/20211008122439.166063-4-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 6 +- drivers/infiniband/core/sysfs.c | 8 +- drivers/infiniband/hw/bnxt_re/hw_counters.c | 137 ++++++++++++++-------------- drivers/infiniband/hw/cxgb4/provider.c | 22 ++--- drivers/infiniband/hw/efa/efa_verbs.c | 19 ++-- drivers/infiniband/hw/hfi1/verbs.c | 53 +++++------ drivers/infiniband/hw/irdma/verbs.c | 100 ++++++++++---------- drivers/infiniband/hw/mlx4/main.c | 44 +++++---- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx5/counters.c | 41 ++++----- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/sw/rxe/rxe_hw_counters.c | 42 ++++----- 12 files changed, 237 insertions(+), 239 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e9b4b2cccaa0..3f6b98a87566 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -969,7 +969,8 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, return -EMSGSIZE; for (i = 0; i < st->num_counters; i++) - if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, + st->value[i])) goto err; nla_nest_end(msg, table_attr); @@ -2105,7 +2106,8 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, for (i = 0; i < num_cnts; i++) { v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, + stats->descs[i].name, v)) { ret = -EMSGSIZE; goto err_table; } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 6146c3c1cbe5..c3663cfdcd52 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -895,7 +895,7 @@ alloc_hw_stats_device(struct ib_device *ibdev) stats = ibdev->ops.alloc_hw_device_stats(ibdev); if (!stats) return ERR_PTR(-ENOMEM); - if (!stats->names || stats->num_counters <= 0) + if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* @@ -957,7 +957,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev) for (i = 0; i < data->stats->num_counters; i++) { attr = &data->attrs[i]; sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = data->stats->names[i]; + attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_device_show; attr->show = show_hw_stats; @@ -994,7 +994,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num); if (!stats) return ERR_PTR(-ENOMEM); - if (!stats->names || stats->num_counters <= 0) + if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* @@ -1047,7 +1047,7 @@ static int setup_hw_port_stats(struct ib_port *port, for (i = 0; i < data->stats->num_counters; i++) { attr = &data->attrs[i]; sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = data->stats->names[i]; + attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_port_show; attr->show = show_hw_stats; diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 1c06c9cf234b..78ca6dfd182b 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -57,74 +57,72 @@ #include "bnxt_re.h" #include "hw_counters.h" -static const char * const bnxt_re_stat_name[] = { - [BNXT_RE_ACTIVE_PD] = "active_pds", - [BNXT_RE_ACTIVE_AH] = "active_ahs", - [BNXT_RE_ACTIVE_QP] = "active_qps", - [BNXT_RE_ACTIVE_SRQ] = "active_srqs", - [BNXT_RE_ACTIVE_CQ] = "active_cqs", - [BNXT_RE_ACTIVE_MR] = "active_mrs", - [BNXT_RE_ACTIVE_MW] = "active_mws", - [BNXT_RE_RX_PKTS] = "rx_pkts", - [BNXT_RE_RX_BYTES] = "rx_bytes", - [BNXT_RE_TX_PKTS] = "tx_pkts", - [BNXT_RE_TX_BYTES] = "tx_bytes", - [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors", - [BNXT_RE_RX_ERRORS] = "rx_roce_errors", - [BNXT_RE_RX_DISCARDS] = "rx_roce_discards", - [BNXT_RE_TO_RETRANSMITS] = "to_retransmits", - [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", - [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded", - [BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd", - [BNXT_RE_MISSING_RESP] = "missing_resp", - [BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err", - [BNXT_RE_BAD_RESP_ERR] = "bad_resp_err", - [BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err", - [BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err", - [BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err", - [BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err", - [BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err", - [BNXT_RE_REMOTE_OP_ERR] = "remote_op_err", - [BNXT_RE_DUP_REQ] = "dup_req", - [BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max", - [BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch", - [BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe", - [BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err", - [BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey", - [BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err", - [BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm", - [BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err", - [BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey", - [BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err", - [BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm", - [BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err", - [BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow", - [BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode", - [BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic", - [BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err", - [BNXT_RE_RES_MEM_ERROR] = "res_mem_err", - [BNXT_RE_RES_SRQ_ERR] = "res_srq_err", - [BNXT_RE_RES_CMP_ERR] = "res_cmp_err", - [BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey", - [BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err", - [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err", - [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err", - [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err", - [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err", - [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count", - [BNXT_RE_TX_ATOMIC_REQ] = "tx_atomic_req", - [BNXT_RE_TX_READ_REQ] = "tx_read_req", - [BNXT_RE_TX_READ_RES] = "tx_read_resp", - [BNXT_RE_TX_WRITE_REQ] = "tx_write_req", - [BNXT_RE_TX_SEND_REQ] = "tx_send_req", - [BNXT_RE_RX_ATOMIC_REQ] = "rx_atomic_req", - [BNXT_RE_RX_READ_REQ] = "rx_read_req", - [BNXT_RE_RX_READ_RESP] = "rx_read_resp", - [BNXT_RE_RX_WRITE_REQ] = "rx_write_req", - [BNXT_RE_RX_SEND_REQ] = "rx_send_req", - [BNXT_RE_RX_ROCE_GOOD_PKTS] = "rx_roce_good_pkts", - [BNXT_RE_RX_ROCE_GOOD_BYTES] = "rx_roce_good_bytes", - [BNXT_RE_OOB] = "rx_out_of_buffer" +static const struct rdma_stat_desc bnxt_re_stat_descs[] = { + [BNXT_RE_ACTIVE_QP].name = "active_qps", + [BNXT_RE_ACTIVE_SRQ].name = "active_srqs", + [BNXT_RE_ACTIVE_CQ].name = "active_cqs", + [BNXT_RE_ACTIVE_MR].name = "active_mrs", + [BNXT_RE_ACTIVE_MW].name = "active_mws", + [BNXT_RE_RX_PKTS].name = "rx_pkts", + [BNXT_RE_RX_BYTES].name = "rx_bytes", + [BNXT_RE_TX_PKTS].name = "tx_pkts", + [BNXT_RE_TX_BYTES].name = "tx_bytes", + [BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors", + [BNXT_RE_RX_ERRORS].name = "rx_roce_errors", + [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards", + [BNXT_RE_TO_RETRANSMITS].name = "to_retransmits", + [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "seq_err_naks_rcvd", + [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded", + [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_naks_rcvd", + [BNXT_RE_MISSING_RESP].name = "missing_resp", + [BNXT_RE_UNRECOVERABLE_ERR].name = "unrecoverable_err", + [BNXT_RE_BAD_RESP_ERR].name = "bad_resp_err", + [BNXT_RE_LOCAL_QP_OP_ERR].name = "local_qp_op_err", + [BNXT_RE_LOCAL_PROTECTION_ERR].name = "local_protection_err", + [BNXT_RE_MEM_MGMT_OP_ERR].name = "mem_mgmt_op_err", + [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "remote_invalid_req_err", + [BNXT_RE_REMOTE_ACCESS_ERR].name = "remote_access_err", + [BNXT_RE_REMOTE_OP_ERR].name = "remote_op_err", + [BNXT_RE_DUP_REQ].name = "dup_req", + [BNXT_RE_RES_EXCEED_MAX].name = "res_exceed_max", + [BNXT_RE_RES_LENGTH_MISMATCH].name = "res_length_mismatch", + [BNXT_RE_RES_EXCEEDS_WQE].name = "res_exceeds_wqe", + [BNXT_RE_RES_OPCODE_ERR].name = "res_opcode_err", + [BNXT_RE_RES_RX_INVALID_RKEY].name = "res_rx_invalid_rkey", + [BNXT_RE_RES_RX_DOMAIN_ERR].name = "res_rx_domain_err", + [BNXT_RE_RES_RX_NO_PERM].name = "res_rx_no_perm", + [BNXT_RE_RES_RX_RANGE_ERR].name = "res_rx_range_err", + [BNXT_RE_RES_TX_INVALID_RKEY].name = "res_tx_invalid_rkey", + [BNXT_RE_RES_TX_DOMAIN_ERR].name = "res_tx_domain_err", + [BNXT_RE_RES_TX_NO_PERM].name = "res_tx_no_perm", + [BNXT_RE_RES_TX_RANGE_ERR].name = "res_tx_range_err", + [BNXT_RE_RES_IRRQ_OFLOW].name = "res_irrq_oflow", + [BNXT_RE_RES_UNSUP_OPCODE].name = "res_unsup_opcode", + [BNXT_RE_RES_UNALIGNED_ATOMIC].name = "res_unaligned_atomic", + [BNXT_RE_RES_REM_INV_ERR].name = "res_rem_inv_err", + [BNXT_RE_RES_MEM_ERROR].name = "res_mem_err", + [BNXT_RE_RES_SRQ_ERR].name = "res_srq_err", + [BNXT_RE_RES_CMP_ERR].name = "res_cmp_err", + [BNXT_RE_RES_INVALID_DUP_RKEY].name = "res_invalid_dup_rkey", + [BNXT_RE_RES_WQE_FORMAT_ERR].name = "res_wqe_format_err", + [BNXT_RE_RES_CQ_LOAD_ERR].name = "res_cq_load_err", + [BNXT_RE_RES_SRQ_LOAD_ERR].name = "res_srq_load_err", + [BNXT_RE_RES_TX_PCI_ERR].name = "res_tx_pci_err", + [BNXT_RE_RES_RX_PCI_ERR].name = "res_rx_pci_err", + [BNXT_RE_OUT_OF_SEQ_ERR].name = "oos_drop_count", + [BNXT_RE_TX_ATOMIC_REQ].name = "tx_atomic_req", + [BNXT_RE_TX_READ_REQ].name = "tx_read_req", + [BNXT_RE_TX_READ_RES].name = "tx_read_resp", + [BNXT_RE_TX_WRITE_REQ].name = "tx_write_req", + [BNXT_RE_TX_SEND_REQ].name = "tx_send_req", + [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req", + [BNXT_RE_RX_READ_REQ].name = "rx_read_req", + [BNXT_RE_RX_READ_RESP].name = "rx_read_resp", + [BNXT_RE_RX_WRITE_REQ].name = "rx_write_req", + [BNXT_RE_RX_SEND_REQ].name = "rx_send_req", + [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts", + [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes", + [BNXT_RE_OOB].name = "rx_out_of_buffer" }; static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, @@ -322,7 +320,6 @@ struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, else num_counters = BNXT_RE_NUM_STD_COUNTERS; - return rdma_alloc_hw_stats_struct(bnxt_re_stat_name, - num_counters, + return rdma_alloc_hw_stats_struct(bnxt_re_stat_descs, num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e7337662aff8..0c8fd5a85fcb 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -366,23 +366,23 @@ enum counters { NR_COUNTERS }; -static const char * const names[] = { - [IP4INSEGS] = "ip4InSegs", - [IP4OUTSEGS] = "ip4OutSegs", - [IP4RETRANSSEGS] = "ip4RetransSegs", - [IP4OUTRSTS] = "ip4OutRsts", - [IP6INSEGS] = "ip6InSegs", - [IP6OUTSEGS] = "ip6OutSegs", - [IP6RETRANSSEGS] = "ip6RetransSegs", - [IP6OUTRSTS] = "ip6OutRsts" +static const struct rdma_stat_desc cxgb4_descs[] = { + [IP4INSEGS].name = "ip4InSegs", + [IP4OUTSEGS].name = "ip4OutSegs", + [IP4RETRANSSEGS].name = "ip4RetransSegs", + [IP4OUTRSTS].name = "ip4OutRsts", + [IP6INSEGS].name = "ip6InSegs", + [IP6OUTSEGS].name = "ip6OutSegs", + [IP6RETRANSSEGS].name = "ip6RetransSegs", + [IP6OUTRSTS].name = "ip6OutRsts" }; static struct rdma_hw_stats *c4iw_alloc_device_stats(struct ib_device *ibdev) { - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); + BUILD_BUG_ON(ARRAY_SIZE(cxgb4_descs) != NR_COUNTERS); /* FIXME: these look like port stats */ - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, + return rdma_alloc_hw_stats_struct(cxgb4_descs, NR_COUNTERS, RDMA_HW_STATS_DEFAULT_LIFESPAN); } diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 3353ad4925ee..8335a6f44869 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -60,13 +60,14 @@ struct efa_user_mmap_entry { op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ #define EFA_STATS_ENUM(ename, name) ename, -#define EFA_STATS_STR(ename, name) [ename] = name, +#define EFA_STATS_STR(ename, nam) \ + [ename].name = nam, enum efa_hw_device_stats { EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM) }; -static const char *const efa_device_stats_names[] = { +static const struct rdma_stat_desc efa_device_stats_descs[] = { EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR) }; @@ -74,7 +75,7 @@ enum efa_hw_port_stats { EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM) }; -static const char *const efa_port_stats_names[] = { +static const struct rdma_stat_desc efa_port_stats_descs[] = { EFA_DEFINE_PORT_STATS(EFA_STATS_STR) }; @@ -1959,15 +1960,15 @@ int efa_destroy_ah(struct ib_ah *ibah, u32 flags) struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { - return rdma_alloc_hw_stats_struct(efa_port_stats_names, - ARRAY_SIZE(efa_port_stats_names), + return rdma_alloc_hw_stats_struct(efa_port_stats_descs, + ARRAY_SIZE(efa_port_stats_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev) { - return rdma_alloc_hw_stats_struct(efa_device_stats_names, - ARRAY_SIZE(efa_device_stats_names), + return rdma_alloc_hw_stats_struct(efa_device_stats_descs, + ARRAY_SIZE(efa_device_stats_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -1992,7 +1993,7 @@ static int efa_fill_device_stats(struct efa_dev *dev, stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); - return ARRAY_SIZE(efa_device_stats_names); + return ARRAY_SIZE(efa_device_stats_descs); } static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, @@ -2041,7 +2042,7 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; - return ARRAY_SIZE(efa_port_stats_names); + return ARRAY_SIZE(efa_port_stats_descs); } int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 26bea51869bf..ed9fa0d84e9e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1602,8 +1602,8 @@ static const char * const driver_cntr_names[] = { }; static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ -static const char **dev_cntr_names; -static const char **port_cntr_names; +static struct rdma_stat_desc *dev_cntr_descs; +static struct rdma_stat_desc *port_cntr_descs; int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); static int num_dev_cntrs; static int num_port_cntrs; @@ -1614,13 +1614,12 @@ static int cntr_names_initialized; * strings. Optionally some entries can be reserved in the array to hold extra * external strings. */ -static int init_cntr_names(const char *names_in, - const size_t names_len, - int num_extra_names, - int *num_cntrs, - const char ***cntr_names) +static int init_cntr_names(const char *names_in, const size_t names_len, + int num_extra_names, int *num_cntrs, + struct rdma_stat_desc **cntr_descs) { - char *names_out, *p, **q; + struct rdma_stat_desc *q; + char *names_out, *p; int i, n; n = 0; @@ -1628,26 +1627,28 @@ static int init_cntr_names(const char *names_in, if (names_in[i] == '\n') n++; - names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, - GFP_KERNEL); + names_out = + kmalloc((n + num_extra_names) * sizeof(struct rdma_stat_desc) + + names_len, + GFP_KERNEL); if (!names_out) { *num_cntrs = 0; - *cntr_names = NULL; + *cntr_descs = NULL; return -ENOMEM; } - p = names_out + (n + num_extra_names) * sizeof(char *); + p = names_out + (n + num_extra_names) * sizeof(struct rdma_stat_desc); memcpy(p, names_in, names_len); - q = (char **)names_out; + q = (struct rdma_stat_desc *)names_out; for (i = 0; i < n; i++) { - q[i] = p; + q[i].name = p; p = strchr(p, '\n'); *p++ = '\0'; } *num_cntrs = n; - *cntr_names = (const char **)names_out; + *cntr_descs = (struct rdma_stat_desc *)names_out; return 0; } @@ -1661,18 +1662,18 @@ static int init_counters(struct ib_device *ibdev) goto out_unlock; err = init_cntr_names(dd->cntrnames, dd->cntrnameslen, num_driver_cntrs, - &num_dev_cntrs, &dev_cntr_names); + &num_dev_cntrs, &dev_cntr_descs); if (err) goto out_unlock; for (i = 0; i < num_driver_cntrs; i++) - dev_cntr_names[num_dev_cntrs + i] = driver_cntr_names[i]; + dev_cntr_descs[num_dev_cntrs + i].name = driver_cntr_names[i]; err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen, 0, - &num_port_cntrs, &port_cntr_names); + &num_port_cntrs, &port_cntr_descs); if (err) { - kfree(dev_cntr_names); - dev_cntr_names = NULL; + kfree(dev_cntr_descs); + dev_cntr_descs = NULL; goto out_unlock; } cntr_names_initialized = 1; @@ -1686,7 +1687,7 @@ static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev) { if (init_counters(ibdev)) return NULL; - return rdma_alloc_hw_stats_struct(dev_cntr_names, + return rdma_alloc_hw_stats_struct(dev_cntr_descs, num_dev_cntrs + num_driver_cntrs, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -1696,7 +1697,7 @@ static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev, { if (init_counters(ibdev)) return NULL; - return rdma_alloc_hw_stats_struct(port_cntr_names, num_port_cntrs, + return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -1921,10 +1922,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) verbs_txreq_exit(dev); mutex_lock(&cntr_names_lock); - kfree(dev_cntr_names); - kfree(port_cntr_names); - dev_cntr_names = NULL; - port_cntr_names = NULL; + kfree(dev_cntr_descs); + kfree(port_cntr_descs); + dev_cntr_descs = NULL; + port_cntr_descs = NULL; cntr_names_initialized = 0; mutex_unlock(&cntr_names_lock); } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 7110ebf834f9..6a544c253603 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3647,89 +3647,89 @@ static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num, return 0; } -static const char *const irdma_hw_stat_names[] = { +static const struct rdma_stat_desc irdma_hw_stat_descs[] = { /* 32bit names */ - [IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors", - [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards", - [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts", - [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes", - [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards", - [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts", - [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes", - [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs", - [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors", - [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors", - [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled", - [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored", - [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent", + [IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors", + [IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards", + [IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts", + [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes", + [IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards", + [IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts", + [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes", + [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "tcpRetransSegs", + [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "tcpInOptErrors", + [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "tcpInProtoErrors", + [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled", + [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored", + [IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent", /* 64bit names */ - [IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InOctets", - [IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InPkts", - [IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InReasmRqd", - [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InMcastOctets", - [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InMcastPkts", - [IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutOctets", - [IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutPkts", - [IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutSegRqd", - [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutMcastOctets", - [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutMcastPkts", - [IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InOctets", - [IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InPkts", - [IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InReasmRqd", - [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InMcastOctets", - [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InMcastPkts", - [IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutOctets", - [IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutPkts", - [IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutSegRqd", - [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutMcastOctets", - [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutMcastPkts", - [IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "tcpInSegs", - [IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32].name = "tcpOutSegs", - [IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwInRdmaReads", - [IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwInRdmaSends", - [IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwInRdmaWrites", - [IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwOutRdmaReads", - [IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwOutRdmaSends", - [IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwOutRdmaWrites", - [IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwRdmaBnd", - [IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwRdmaInv", - [IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "RxUDP", - [IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "TxUDP", - [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = - "RxECNMrkd", + [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] + .name = "RxECNMrkd", }; static void irdma_get_dev_fw_str(struct ib_device *dev, char *str) @@ -3753,10 +3753,10 @@ static struct rdma_hw_stats *irdma_alloc_hw_port_stats(struct ib_device *ibdev, IRDMA_HW_STAT_INDEX_MAX_64; unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; - BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_names) != + BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_descs) != (IRDMA_HW_STAT_INDEX_MAX_32 + IRDMA_HW_STAT_INDEX_MAX_64)); - return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters, + return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters, lifespan); } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f367f4a4abff..38742e233915 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2105,10 +2105,10 @@ mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev) struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_diag_counters *diag = dev->diag_counters; - if (!diag[0].name) + if (!diag[0].descs) return NULL; - return rdma_alloc_hw_stats_struct(diag[0].name, diag[0].num_counters, + return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -2118,10 +2118,10 @@ mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_diag_counters *diag = dev->diag_counters; - if (!diag[1].name) + if (!diag[1].descs) return NULL; - return rdma_alloc_hw_stats_struct(diag[1].name, diag[1].num_counters, + return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -2151,10 +2151,8 @@ static int mlx4_ib_get_hw_stats(struct ib_device *ibdev, } static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, - const char ***name, - u32 **offset, - u32 *num, - bool port) + struct rdma_stat_desc **pdescs, + u32 **offset, u32 *num, bool port) { u32 num_counters; @@ -2166,46 +2164,46 @@ static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, if (!port) num_counters += ARRAY_SIZE(diag_device_only); - *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL); - if (!*name) + *pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc), + GFP_KERNEL); + if (!*pdescs) return -ENOMEM; *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL); if (!*offset) - goto err_name; + goto err; *num = num_counters; return 0; -err_name: - kfree(*name); +err: + kfree(*pdescs); return -ENOMEM; } static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, - const char **name, - u32 *offset, - bool port) + struct rdma_stat_desc *descs, + u32 *offset, bool port) { int i; int j; for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) { - name[i] = diag_basic[i].name; + descs[i].name = diag_basic[i].name; offset[i] = diag_basic[i].offset; } if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) { for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) { - name[j] = diag_ext[i].name; + descs[j].name = diag_ext[i].name; offset[j] = diag_ext[i].offset; } } if (!port) { for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) { - name[j] = diag_device_only[i].name; + descs[j].name = diag_device_only[i].name; offset[j] = diag_device_only[i].offset; } } @@ -2233,13 +2231,13 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) if (i && !per_port) continue; - ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name, + ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs, &diag[i].offset, &diag[i].num_counters, i); if (ret) goto err_alloc; - mlx4_ib_fill_diag_counters(ibdev, diag[i].name, + mlx4_ib_fill_diag_counters(ibdev, diag[i].descs, diag[i].offset, i); } @@ -2249,7 +2247,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) err_alloc: if (i) { - kfree(diag[i - 1].name); + kfree(diag[i - 1].descs); kfree(diag[i - 1].offset); } @@ -2262,7 +2260,7 @@ static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev) for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { kfree(ibdev->diag_counters[i].offset); - kfree(ibdev->diag_counters[i].name); + kfree(ibdev->diag_counters[i].descs); } } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c60f6e9ac640..d84023b4b1b8 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -601,7 +601,7 @@ struct mlx4_ib_counters { #define MLX4_DIAG_COUNTERS_TYPES 2 struct mlx4_ib_diag_counters { - const char **name; + struct rdma_stat_desc *descs; u32 *offset; u32 num_counters; }; diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 224ba36f2946..caa35ea14b48 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -167,7 +167,7 @@ mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, + return rdma_alloc_hw_stats_struct(cnts->descs, cnts->num_q_counters + cnts->num_cong_counters + cnts->num_ext_ppcnt_counters, @@ -180,7 +180,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, + return rdma_alloc_hw_stats_struct(cnts->descs, cnts->num_q_counters + cnts->num_cong_counters + cnts->num_ext_ppcnt_counters, @@ -302,7 +302,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port - 1); - return rdma_alloc_hw_stats_struct(cnts->names, + return rdma_alloc_hw_stats_struct(cnts->descs, cnts->num_q_counters + cnts->num_cong_counters + cnts->num_ext_ppcnt_counters, @@ -371,57 +371,55 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) return mlx5_ib_qp_set_counter(qp, NULL); } - static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) + struct rdma_stat_desc *descs, size_t *offsets) { int i; int j = 0; for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; + descs[j].name = basic_q_cnts[i].name; offsets[j] = basic_q_cnts[i].offset; } if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; + descs[j].name = out_of_seq_q_cnts[i].name; offsets[j] = out_of_seq_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; + descs[j].name = retrans_q_cnts[i].name; offsets[j] = retrans_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; + descs[j].name = extended_err_cnts[i].name; offsets[j] = extended_err_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; + descs[j].name = roce_accl_cnts[i].name; offsets[j] = roce_accl_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; + descs[j].name = cong_cnts[i].name; offsets[j] = cong_cnts[i].offset; } } if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; + descs[j].name = ext_ppcnt_cnts[i].name; offsets[j] = ext_ppcnt_cnts[i].offset; } } @@ -457,20 +455,21 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } - cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); - if (!cnts->names) + cnts->descs = kcalloc(num_counters, + sizeof(struct rdma_stat_desc), GFP_KERNEL); + if (!cnts->descs) return -ENOMEM; cnts->offsets = kcalloc(num_counters, sizeof(*cnts->offsets), GFP_KERNEL); if (!cnts->offsets) - goto err_names; + goto err; return 0; -err_names: - kfree(cnts->names); - cnts->names = NULL; +err: + kfree(cnts->descs); + cnts->descs = NULL; return -ENOMEM; } @@ -491,7 +490,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) dev->port[i].cnts.set_id); mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); } - kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.descs); kfree(dev->port[i].cnts.offsets); } } @@ -514,7 +513,7 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) if (err) goto err_alloc; - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, + mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, dev->port[i].cnts.offsets); MLX5_SET(alloc_q_counter_in, in, uid, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bf20a388eabe..6f5451d96dd7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -798,7 +798,7 @@ struct mlx5_ib_resources { }; struct mlx5_ib_counters { - const char **names; + struct rdma_stat_desc *descs; size_t *offsets; u32 num_q_counters; u32 num_cong_counters; diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index d5ceb706d964..a012522b577a 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -6,22 +6,22 @@ #include "rxe.h" #include "rxe_hw_counters.h" -static const char * const rxe_counter_name[] = { - [RXE_CNT_SENT_PKTS] = "sent_pkts", - [RXE_CNT_RCVD_PKTS] = "rcvd_pkts", - [RXE_CNT_DUP_REQ] = "duplicate_request", - [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request", - [RXE_CNT_RCV_RNR] = "rcvd_rnr_err", - [RXE_CNT_SND_RNR] = "send_rnr_err", - [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err", - [RXE_CNT_COMPLETER_SCHED] = "ack_deferred", - [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err", - [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err", - [RXE_CNT_COMP_RETRY] = "completer_retry_err", - [RXE_CNT_SEND_ERR] = "send_err", - [RXE_CNT_LINK_DOWNED] = "link_downed", - [RXE_CNT_RDMA_SEND] = "rdma_sends", - [RXE_CNT_RDMA_RECV] = "rdma_recvs", +static const struct rdma_stat_desc rxe_counter_descs[] = { + [RXE_CNT_SENT_PKTS].name = "sent_pkts", + [RXE_CNT_RCVD_PKTS].name = "rcvd_pkts", + [RXE_CNT_DUP_REQ].name = "duplicate_request", + [RXE_CNT_OUT_OF_SEQ_REQ].name = "out_of_seq_request", + [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", + [RXE_CNT_SND_RNR].name = "send_rnr_err", + [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", + [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", + [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", + [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", + [RXE_CNT_COMP_RETRY].name = "completer_retry_err", + [RXE_CNT_SEND_ERR].name = "send_err", + [RXE_CNT_LINK_DOWNED].name = "link_downed", + [RXE_CNT_RDMA_SEND].name = "rdma_sends", + [RXE_CNT_RDMA_RECV].name = "rdma_recvs", }; int rxe_ib_get_hw_stats(struct ib_device *ibdev, @@ -34,18 +34,18 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev, if (!port || !stats) return -EINVAL; - for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) + for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_descs); cnt++) stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); - return ARRAY_SIZE(rxe_counter_name); + return ARRAY_SIZE(rxe_counter_descs); } struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { - BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS); + BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_descs) != RXE_NUM_OF_COUNTERS); - return rdma_alloc_hw_stats_struct(rxe_counter_name, - ARRAY_SIZE(rxe_counter_name), + return rdma_alloc_hw_stats_struct(rxe_counter_descs, + ARRAY_SIZE(rxe_counter_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } -- cgit From 0a0800ce2a6a4d58d73f712f1c0780974877babf Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 8 Oct 2021 15:24:30 +0300 Subject: RDMA/core: Add a helper API rdma_free_hw_stats_struct Add a new API rdma_free_hw_stats_struct to pair with rdma_alloc_hw_stats_struct (which is also de-inlined). This will be useful when there are more alloc/free works in following patches. Link: https://lore.kernel.org/r/20211008122439.166063-5-markzhang@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 8 ++++---- drivers/infiniband/core/sysfs.c | 8 ++++---- drivers/infiniband/core/verbs.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index df9e6c5e4ddf..331cd29f0d61 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -165,7 +165,7 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, return counter; err_mode: - kfree(counter->stats); + rdma_free_hw_stats_struct(counter->stats); err_stats: rdma_restrack_put(&counter->res); kfree(counter); @@ -186,7 +186,7 @@ static void rdma_counter_free(struct rdma_counter *counter) mutex_unlock(&port_counter->lock); rdma_restrack_del(&counter->res); - kfree(counter->stats); + rdma_free_hw_stats_struct(counter->stats); kfree(counter); } @@ -618,7 +618,7 @@ void rdma_counter_init(struct ib_device *dev) fail: for (i = port; i >= rdma_start_port(dev); i--) { port_counter = &dev->port_data[port].port_counter; - kfree(port_counter->hstats); + rdma_free_hw_stats_struct(port_counter->hstats); port_counter->hstats = NULL; mutex_destroy(&port_counter->lock); } @@ -631,7 +631,7 @@ void rdma_counter_release(struct ib_device *dev) rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; - kfree(port_counter->hstats); + rdma_free_hw_stats_struct(port_counter->hstats); mutex_destroy(&port_counter->lock); } } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c3663cfdcd52..8d831d4fd2ad 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -755,7 +755,7 @@ static void ib_port_release(struct kobject *kobj) for (i = 0; i != ARRAY_SIZE(port->groups); i++) kfree(port->groups[i].attrs); if (port->hw_stats_data) - kfree(port->hw_stats_data->stats); + rdma_free_hw_stats_struct(port->hw_stats_data->stats); kfree(port->hw_stats_data); kfree(port); } @@ -919,14 +919,14 @@ alloc_hw_stats_device(struct ib_device *ibdev) err_free_data: kfree(data); err_free_stats: - kfree(stats); + rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } void ib_device_release_hw_stats(struct hw_stats_device_data *data) { kfree(data->group.attrs); - kfree(data->stats); + rdma_free_hw_stats_struct(data->stats); kfree(data); } @@ -1018,7 +1018,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) err_free_data: kfree(data); err_free_stats: - kfree(stats); + rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 89a2b21976d6..8e72290d6b38 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2976,3 +2976,38 @@ bool __rdma_block_iter_next(struct ib_block_iter *biter) return true; } EXPORT_SYMBOL(__rdma_block_iter_next); + +/** + * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct + * for the drivers. + * @descs: array of static descriptors + * @num_counters: number of elements in array + * @lifespan: milliseconds between updates + */ +struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const struct rdma_stat_desc *descs, int num_counters, + unsigned long lifespan) +{ + struct rdma_hw_stats *stats; + + stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL); + if (!stats) + return NULL; + + stats->descs = descs; + stats->num_counters = num_counters; + stats->lifespan = msecs_to_jiffies(lifespan); + + return stats; +} +EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); + +/** + * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats + * @stats: statistics to release + */ +void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) +{ + kfree(stats); +} +EXPORT_SYMBOL(rdma_free_hw_stats_struct); -- cgit From 0dc89684605e8b60af645988d3e0d80a57b6e937 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:31 +0300 Subject: RDMA/counter: Add an is_disabled field in struct rdma_hw_stats Add a bitmap in rdma_hw_stat structure, with each bit indicates whether the corresponding counter is currently disabled or not. By default hwcounters are enabled. Link: https://lore.kernel.org/r/20211008122439.166063-6-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 11 ++++++++++- drivers/infiniband/core/verbs.c | 13 +++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 3f6b98a87566..67519730b1ac 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -968,15 +968,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, if (!table_attr) return -EMSGSIZE; - for (i = 0; i < st->num_counters; i++) + mutex_lock(&st->lock); + for (i = 0; i < st->num_counters; i++) { + if (test_bit(i, st->is_disabled)) + continue; if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, st->value[i])) goto err; + } + mutex_unlock(&st->lock); nla_nest_end(msg, table_attr); return 0; err: + mutex_unlock(&st->lock); nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } @@ -2104,6 +2110,9 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, goto err_stats; } for (i = 0; i < num_cnts; i++) { + if (test_bit(i, stats->is_disabled)) + continue; + v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); if (rdma_nl_stat_hwcounter_entry(msg, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 8e72290d6b38..47cf273d0678 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2994,11 +2994,20 @@ struct rdma_hw_stats *rdma_alloc_hw_stats_struct( if (!stats) return NULL; + stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!stats->is_disabled) + goto err; + stats->descs = descs; stats->num_counters = num_counters; stats->lifespan = msecs_to_jiffies(lifespan); return stats; + +err: + kfree(stats); + return NULL; } EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); @@ -3008,6 +3017,10 @@ EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); */ void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) { + if (!stats) + return; + + kfree(stats->is_disabled); kfree(stats); } EXPORT_SYMBOL(rdma_free_hw_stats_struct); -- cgit From 5e2ddd1e598273d77f653c70df96fc417a2d0a85 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:32 +0300 Subject: RDMA/counter: Add optional counter support An optional counter is a driver-specific counter that may be dynamically enabled/disabled. This enhancement allows drivers to expose counters which are, for example, mutually exclusive and cannot be enabled at the same time, counters that might degrades performance, optional debug counters, etc. Optional counters are marked with IB_STAT_FLAG_OPTIONAL flag. They are not exported in sysfs, and must be at the end of all stats, otherwise the attr->show() in sysfs would get wrong indexes for hwcounters that are behind optional counters. Link: https://lore.kernel.org/r/20211008122439.166063-7-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Neta Ostrovsky Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 32 ++++++++++++++++++++++++++++++++ drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/sysfs.c | 36 ++++++++++++++++++++++++++---------- 3 files changed, 59 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 331cd29f0d61..af59486fe418 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter, return ret; } +int rdma_counter_modify(struct ib_device *dev, u32 port, + unsigned int index, bool enable) +{ + struct rdma_hw_stats *stats; + int ret = 0; + + if (!dev->ops.modify_hw_stat) + return -EOPNOTSUPP; + + stats = ib_get_hw_stats_port(dev, port); + if (!stats || index >= stats->num_counters || + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) + return -EINVAL; + + mutex_lock(&stats->lock); + + if (enable != test_bit(index, stats->is_disabled)) + goto out; + + ret = dev->ops.modify_hw_stat(dev, port, index, enable); + if (ret) + goto out; + + if (enable) + clear_bit(index, stats->is_disabled); + else + set_bit(index, stats->is_disabled); +out: + mutex_unlock(&stats->lock); + return ret; +} + static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, struct ib_qp *qp, enum rdma_nl_counter_mode mode) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f4814bb7f082..22a4adda7981 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); SET_DEVICE_OP(dev_ops, modify_flow_action_esp); + SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); SET_DEVICE_OP(dev_ops, modify_srq); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 8d831d4fd2ad..1bf3aea4b71e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -934,7 +934,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev) { struct hw_stats_device_attribute *attr; struct hw_stats_device_data *data; - int i, ret; + bool opstat_skipped = false; + int i, ret, pos = 0; data = alloc_hw_stats_device(ibdev); if (IS_ERR(data)) { @@ -955,16 +956,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev) data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { + opstat_skipped = true; + continue; + } + + WARN_ON(opstat_skipped); + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_device_show; attr->show = show_hw_stats; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -972,7 +980,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev) attr->show = show_stats_lifespan; attr->attr.store = hw_stat_device_store; attr->store = set_stats_lifespan; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) if (!ibdev->groups[i]) { ibdev->groups[i] = &data->group; @@ -1027,7 +1035,8 @@ static int setup_hw_port_stats(struct ib_port *port, { struct hw_stats_port_attribute *attr; struct hw_stats_port_data *data; - int i, ret; + bool opstat_skipped = false; + int i, ret, pos = 0; data = alloc_hw_stats_port(port, group); if (IS_ERR(data)) @@ -1045,16 +1054,23 @@ static int setup_hw_port_stats(struct ib_port *port, data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { + opstat_skipped = true; + continue; + } + + WARN_ON(opstat_skipped); + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_port_show; attr->show = show_hw_stats; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -1062,7 +1078,7 @@ static int setup_hw_port_stats(struct ib_port *port, attr->show = show_stats_lifespan; attr->attr.store = hw_stat_port_store; attr->store = set_stats_lifespan; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; port->hw_stats_data = data; return 0; -- cgit From 7301d0a9834c7f1f0c91c1f0a46c7b191b1fd0da Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:33 +0300 Subject: RDMA/nldev: Add support to get status of all counters This patch adds the ability to get the name, index and status of all counters for each link through RDMA netlink. This can be used for user-space to get the current optional-counter mode. Examples: $ rdma statistic mode link rocep8s0f0/1 optional-counters cc_rx_ce_pkts $ rdma statistic mode supported link rocep8s0f0/1 supported optional-counters cc_rx_ce_pkts,cc_rx_cnp_pkts,cc_tx_cnp_pkts link rocep8s0f1/1 supported optional-counters cc_rx_ce_pkts,cc_rx_cnp_pkts,cc_tx_cnp_pkts Link: https://lore.kernel.org/r/20211008122439.166063-8-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Neta Ostrovsky Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 98 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 67519730b1ac..210057fef7bd 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -154,6 +154,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -2264,6 +2266,99 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, return ret; } +static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; + struct rdma_hw_stats *stats; + struct ib_device *device; + struct sk_buff *msg; + u32 devid, port; + int ret, i; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), devid); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + stats = ib_get_hw_stats_port(device, port); + if (!stats) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + nlh = nlmsg_put( + msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), + 0, 0); + + ret = -EMSGSIZE; + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) + goto err_msg; + + table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!table) + goto err_msg; + + mutex_lock(&stats->lock); + for (i = 0; i < stats->num_counters; i++) { + entry = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); + if (!entry) + goto err_msg_table; + + if (nla_put_string(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, + stats->descs[i].name) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) + goto err_msg_entry; + + if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && + (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, + !test_bit(i, stats->is_disabled)))) + goto err_msg_entry; + + nla_nest_end(msg, entry); + } + mutex_unlock(&stats->lock); + + nla_nest_end(msg, table); + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); + +err_msg_entry: + nla_nest_cancel(msg, entry); +err_msg_table: + mutex_unlock(&stats->lock); + nla_nest_cancel(msg, table); +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -2353,6 +2448,9 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .dump = nldev_res_get_mr_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { + .doit = nldev_stat_get_counter_status_doit, + }, }; void __init nldev_init(void) -- cgit From 822cf785ac6d9120386f59964d6d029f3f04a8e3 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:34 +0300 Subject: RDMA/nldev: Split nldev_stat_set_mode_doit out of nldev_stat_set_doit In order to allow expansion of the set command with more set options, take the set mode out of the main set function. Link: https://lore.kernel.org/r/20211008122439.166063-9-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 116 ++++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 46 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 210057fef7bd..8361eb08e13b 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1897,24 +1897,67 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +static int nldev_stat_set_mode_doit(struct sk_buff *msg, + struct netlink_ext_ack *extack, + struct nlattr *tb[], + struct ib_device *device, u32 port) +{ + u32 mode, mask = 0, qpn, cntn = 0; + int ret; + + /* Currently only counter for QP is supported */ + if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + return -EINVAL; + + mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); + if (mode == RDMA_COUNTER_MODE_AUTO) { + if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) + mask = nla_get_u32( + tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); + return rdma_counter_set_auto_mode(device, port, mask, extack); + } + + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); + if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { + cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + ret = rdma_counter_bind_qpn(device, port, qpn, cntn); + if (ret) + return ret; + } else { + ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); + if (ret) + return ret; + } + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { + ret = -EMSGSIZE; + goto err_fill; + } + + return 0; + +err_fill: + rdma_counter_unbind_qpn(device, port, qpn, cntn); + return ret; +} + static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - u32 index, port, mode, mask = 0, qpn, cntn = 0; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; + u32 index, port; int ret; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); - /* Currently only counter for QP is supported */ - if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || - !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || - !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE]) - return -EINVAL; - - if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); @@ -1925,59 +1968,40 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; - goto err; + goto err_put_device; + } + + if (!tb[RDMA_NLDEV_ATTR_STAT_MODE]) { + ret = -EINVAL; + goto err_put_device; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_put_device; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); - - mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); - if (mode == RDMA_COUNTER_MODE_AUTO) { - if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) - mask = nla_get_u32( - tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); - ret = rdma_counter_set_auto_mode(device, port, mask, extack); - if (ret) - goto err_msg; - } else { - if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) - goto err_msg; - qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { - cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); - ret = rdma_counter_bind_qpn(device, port, qpn, cntn); - } else { - ret = rdma_counter_bind_qpn_alloc(device, port, - qpn, &cntn); - } - if (ret) - goto err_msg; - - if (fill_nldev_handle(msg, device) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { - ret = -EMSGSIZE; - goto err_fill; - } + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { + ret = -EMSGSIZE; + goto err_free_msg; } + ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); + if (ret) + goto err_free_msg; + nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); -err_fill: - rdma_counter_unbind_qpn(device, port, qpn, cntn); -err_msg: +err_free_msg: nlmsg_free(msg); -err: +err_put_device: ib_device_put(device); return ret; } -- cgit From 3c3c1f1416392382faa0238e76a70d7810aab2ef Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:35 +0300 Subject: RDMA/nldev: Allow optional-counter status configuration through RDMA netlink Provide an option to allow users to enable/disable optional counters through RDMA netlink. Limiting it to users with ADMIN capability only. Examples: 1. Enable optional counters cc_rx_ce_pkts and cc_rx_cnp_pkts (and disable all others): $ sudo rdma statistic set link rocep8s0f0/1 optional-counters \ cc_rx_ce_pkts,cc_rx_cnp_pkts 2. Remove all optional counters: $ sudo rdma statistic unset link rocep8s0f0/1 optional-counters Link: https://lore.kernel.org/r/20211008122439.166063-10-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 61 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 8361eb08e13b..fedc0fa6ebf9 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1945,6 +1945,50 @@ err_fill: return ret; } +static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], + struct ib_device *device, + u32 port) +{ + struct rdma_hw_stats *stats; + int rem, i, index, ret = 0; + struct nlattr *entry_attr; + unsigned long *target; + + stats = ib_get_hw_stats_port(device, port); + if (!stats) + return -EINVAL; + + target = kcalloc(BITS_TO_LONGS(stats->num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!target) + return -ENOMEM; + + nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], + rem) { + index = nla_get_u32(entry_attr); + if ((index >= stats->num_counters) || + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { + ret = -EINVAL; + goto out; + } + + set_bit(index, target); + } + + for (i = 0; i < stats->num_counters; i++) { + if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) + continue; + + ret = rdma_counter_modify(device, port, i, test_bit(i, target)); + if (ret) + goto out; + } + +out: + kfree(target); + return ret; +} + static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -1971,7 +2015,8 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_put_device; } - if (!tb[RDMA_NLDEV_ATTR_STAT_MODE]) { + if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && + !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { ret = -EINVAL; goto err_put_device; } @@ -1991,9 +2036,17 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_free_msg; } - ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); - if (ret) - goto err_free_msg; + if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { + ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); + if (ret) + goto err_free_msg; + } + + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { + ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); + if (ret) + goto err_free_msg; + } nlmsg_end(msg, nlh); ib_device_put(device); -- cgit From 886773d24962799e5eb3a1b31a9b77e32d6c25a1 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:36 +0300 Subject: RDMA/mlx5: Support optional counters in hw_stats initialization Add optional counter support when allocate and initialize hw_stats structure. Optional counters have IB_STAT_FLAG_OPTIONAL flag set and are disabled by default. Link: https://lore.kernel.org/r/20211008122439.166063-11-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/counters.c | 90 ++++++++++++++++++++++++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 75 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index caa35ea14b48..8fe7900b29f0 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -75,6 +75,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), }; +#define INIT_OP_COUNTER(_name) \ + { .name = #_name } + +static const struct mlx5_ib_counter basic_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_ce_pkts), +}; + +static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_cnp_pkts), +}; + +static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_tx_cnp_pkts), +}; + static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, struct uverbs_attr_bundle *attrs) @@ -161,17 +176,34 @@ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num) return cnts->set_id; } +static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts) +{ + struct rdma_hw_stats *stats; + u32 num_hw_counters; + int i; + + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + stats = rdma_alloc_hw_stats_struct(cnts->descs, + num_hw_counters + + cnts->num_op_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + if (!stats) + return NULL; + + for (i = 0; i < cnts->num_op_counters; i++) + set_bit(num_hw_counters + i, stats->is_disabled); + + return stats; +} + static struct rdma_hw_stats * mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; - return rdma_alloc_hw_stats_struct(cnts->descs, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static struct rdma_hw_stats * @@ -180,11 +212,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; - return rdma_alloc_hw_stats_struct(cnts->descs, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, @@ -302,11 +330,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port - 1); - return rdma_alloc_hw_stats_struct(cnts->descs, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) @@ -423,13 +447,34 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, offsets[j] = ext_ppcnt_cnts[i].offset; } } + + for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { + descs[j].name = basic_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmarx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + } + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmatx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + } + } } static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, struct mlx5_ib_counters *cnts) { - u32 num_counters; + u32 num_counters, num_op_counters; num_counters = ARRAY_SIZE(basic_q_cnts); @@ -455,6 +500,19 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } + + num_op_counters = ARRAY_SIZE(basic_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); + + cnts->num_op_counters = num_op_counters; + num_counters += num_op_counters; cnts->descs = kcalloc(num_counters, sizeof(struct rdma_stat_desc), GFP_KERNEL); if (!cnts->descs) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6f5451d96dd7..8215d7ab579d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -803,6 +803,7 @@ struct mlx5_ib_counters { u32 num_q_counters; u32 num_cong_counters; u32 num_ext_ppcnt_counters; + u32 num_op_counters; u16 set_id; }; -- cgit From ffa501ef196312f550818304e1558025a11d3c2f Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:37 +0300 Subject: RDMA/mlx5: Add steering support in optional flow counters Adding steering infrastructure for adding and removing optional counter. This allows to add and remove the counters dynamically in order not to hurt performance. Link: https://lore.kernel.org/r/20211008122439.166063-12-markzhang@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/fs.c | 187 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 24 +++++ 2 files changed, 211 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 5fbc0a8454b9..b780185d9dc6 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -10,12 +10,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include "mlx5_ib.h" #include "counters.h" #include "devx.h" @@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return prio; } +enum { + RDMA_RX_ECN_OPCOUNTER_PRIO, + RDMA_RX_CNP_OPCOUNTER_PRIO, +}; + +enum { + RDMA_TX_CNP_OPCOUNTER_PRIO, +}; + +static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.source_vhca_port) || + !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + ft_field_support.source_vhca_port)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria, + misc_parameters.source_vhca_port); + MLX5_SET(fte_match_param, &spec->match_value, + misc_parameters.source_vhca_port, port_num); + + return 0; +} + +static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec, int ipv) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.outer_ip_version)) + return -EOPNOTSUPP; + + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_ecn); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn, + INET_ECN_CE); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + ipv); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.bth_opcode); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode, + IB_BTH_OPCODE_CNP); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_flow_namespace_type fn_type; + int priority, i, err, spec_num; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dst; + struct mlx5_flow_namespace *ns; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_spec *spec; + + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + if (set_ecn_ce_spec(dev, port_num, &spec[0], + MLX5_FS_IPV4_VERSION) || + set_ecn_ce_spec(dev, port_num, &spec[1], + MLX5_FS_IPV6_VERSION)) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 2; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_ECN_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_CNP_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_CNP_OPCOUNTER_PRIO; + break; + + default: + err = -EOPNOTSUPP; + goto free; + } + + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + if (!ns) { + err = -EOPNOTSUPP; + goto free; + } + + prio = &dev->flow_db->opfcs[type]; + if (!prio->flow_table) { + prio = _get_prio(ns, prio, priority, + dev->num_ports * MAX_OPFC_RULES, 1, 0); + if (IS_ERR(prio)) { + err = PTR_ERR(prio); + goto free; + } + } + + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst.counter_id = mlx5_fc_id(opfc->fc); + + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + for (i = 0; i < spec_num; i++) { + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], + &flow_act, &dst, 1); + if (IS_ERR(opfc->rule[i])) { + err = PTR_ERR(opfc->rule[i]); + goto del_rules; + } + } + prio->refcount += spec_num; + kfree(spec); + + return 0; + +del_rules: + for (i -= 1; i >= 0; i--) + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, prio, false); +free: + kfree(spec); + return err; +} + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + int i; + + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, &dev->flow_db->opfcs[type], true); + } +} + static void set_underlay_qp(struct mlx5_ib_dev *dev, struct mlx5_flow_spec *spec, u32 underlay_qpn) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8215d7ab579d..d81ff5078e5e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -263,6 +263,14 @@ struct mlx5_ib_pp { struct mlx5_core_dev *mdev; }; +enum mlx5_ib_optional_counter_type { + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, + + MLX5_IB_OPCOUNTER_MAX, +}; + struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; @@ -271,6 +279,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio fdb; struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -797,6 +806,13 @@ struct mlx5_ib_resources { struct mlx5_ib_port_resources ports[2]; }; +#define MAX_OPFC_RULES 2 + +struct mlx5_ib_op_fc { + struct mlx5_fc *fc; + struct mlx5_flow_handle *rule[MAX_OPFC_RULES]; +}; + struct mlx5_ib_counters { struct rdma_stat_desc *descs; size_t *offsets; @@ -807,6 +823,14 @@ struct mlx5_ib_counters { u16 set_id; }; +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + struct mlx5_ib_multiport_info; struct mlx5_ib_multiport { -- cgit From a29b934ceb4c8ad8bf6d0486a8c8ef015af494f4 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:38 +0300 Subject: RDMA/mlx5: Add modify_op_stat() support Add support for ib callback modify_op_stat() to add or remove an optional counter. When adding, a steering flow table is created with a rule that catches and counts all the matching packets. When removing, the table and flow counter are destroyed. Link: https://lore.kernel.org/r/20211008122439.166063-13-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/counters.c | 79 ++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 74 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 8fe7900b29f0..6ee340c63b20 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -12,6 +12,7 @@ struct mlx5_ib_counter { const char *name; size_t offset; + u32 type; }; #define INIT_Q_COUNTER(_name) \ @@ -75,19 +76,19 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), }; -#define INIT_OP_COUNTER(_name) \ - { .name = #_name } +#define INIT_OP_COUNTER(_name, _type) \ + { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type} static const struct mlx5_ib_counter basic_op_cnts[] = { - INIT_OP_COUNTER(cc_rx_ce_pkts), + INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS), }; static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { - INIT_OP_COUNTER(cc_rx_cnp_pkts), + INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS), }; static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { - INIT_OP_COUNTER(cc_tx_cnp_pkts), + INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), }; static int mlx5_ib_read_counters(struct ib_counters *counters, @@ -451,6 +452,7 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { descs[j].name = basic_op_cnts[i].name; descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &basic_op_cnts[i].type; } if (MLX5_CAP_FLOWTABLE(dev->mdev, @@ -458,6 +460,7 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { descs[j].name = rdmarx_cnp_op_cnts[i].name; descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmarx_cnp_op_cnts[i].type; } } @@ -466,6 +469,7 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { descs[j].name = rdmatx_cnp_op_cnts[i].name; descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmatx_cnp_op_cnts[i].type; } } } @@ -535,7 +539,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; int num_cnt_ports; - int i; + int i, j; num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; @@ -550,6 +554,18 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) } kfree(dev->port[i].cnts.descs); kfree(dev->port[i].cnts.offsets); + + for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) { + if (!dev->port[i].cnts.opfcs[j].fc) + continue; + + mlx5_ib_fs_remove_op_fc(dev, + &dev->port[i].cnts.opfcs[j], + j); + mlx5_fc_destroy(dev->mdev, + dev->port[i].cnts.opfcs[j].fc); + dev->port[i].cnts.opfcs[j].fc = NULL; + } } } @@ -729,6 +745,56 @@ void mlx5_ib_counters_clear_description(struct ib_counters *counters) mutex_unlock(&mcounters->mcntrs_mutex); } +static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, + unsigned int index, bool enable) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_counters *cnts; + struct mlx5_ib_op_fc *opfc; + u32 num_hw_counters, type; + int ret; + + cnts = &dev->port[port - 1].cnts; + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + if (index < num_hw_counters || + index >= (num_hw_counters + cnts->num_op_counters)) + return -EINVAL; + + if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) + return -EINVAL; + + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + opfc = &cnts->opfcs[type]; + + if (enable) { + if (opfc->fc) + return -EEXIST; + + opfc->fc = mlx5_fc_create(dev->mdev, false); + if (IS_ERR(opfc->fc)) + return PTR_ERR(opfc->fc); + + ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type); + if (ret) { + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + } + return ret; + } + + if (!opfc->fc) + return -EINVAL; + + mlx5_ib_fs_remove_op_fc(dev, opfc, type); + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + return 0; +} + static const struct ib_device_ops hw_stats_ops = { .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, .get_hw_stats = mlx5_ib_get_hw_stats, @@ -737,6 +803,7 @@ static const struct ib_device_ops hw_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, + .modify_hw_stat = mlx5_ib_modify_stat, }; static const struct ib_device_ops hw_switchdev_stats_ops = { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d81ff5078e5e..cf8b0653f0ce 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -821,6 +821,7 @@ struct mlx5_ib_counters { u32 num_ext_ppcnt_counters; u32 num_op_counters; u16 set_id; + struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX]; }; int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, -- cgit From a020094090e5293abd5c2ecdcec96a275f56c4ef Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Fri, 8 Oct 2021 15:24:39 +0300 Subject: RDMA/mlx5: Add optional counter support in get_hw_stats callback When get_hw_stats is called, query and return the optional counter statistic as well. Link: https://lore.kernel.org/r/20211008122439.166063-14-markzhang@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Mark Zhang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/counters.c | 88 +++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 6ee340c63b20..6f1c4b57110e 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -270,9 +270,9 @@ free: return ret; } -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u32 port_num, int index) +static int do_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); @@ -324,6 +324,88 @@ done: return num_counters; } +static int do_get_op_stat(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + const struct mlx5_ib_op_fc *opfcs; + u64 packets = 0, bytes; + u32 type; + int ret; + + cnts = get_counters(dev, port_num - 1); + opfcs = cnts->opfcs; + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + if (!opfcs[type].fc) + goto out; + + ret = mlx5_fc_query(dev->mdev, opfcs[type].fc, + &packets, &bytes); + if (ret) + return ret; + +out: + stats->value[index] = packets; + return index; +} + +static int do_get_op_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + int index, ret, num_hw_counters; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + for (index = num_hw_counters; + index < (num_hw_counters + cnts->num_op_counters); index++) { + ret = do_get_op_stat(ibdev, stats, port_num, index); + if (ret != index) + return ret; + } + + return cnts->num_op_counters; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + int num_counters, num_hw_counters, num_op_counters; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + num_counters = num_hw_counters + cnts->num_op_counters; + + if (index < 0 || index > num_counters) + return -EINVAL; + else if (index > 0 && index < num_hw_counters) + return do_get_hw_stats(ibdev, stats, port_num, index); + else if (index >= num_hw_counters && index < num_counters) + return do_get_op_stat(ibdev, stats, port_num, index); + + num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index); + if (num_hw_counters < 0) + return num_hw_counters; + + num_op_counters = do_get_op_stats(ibdev, stats, port_num); + if (num_op_counters < 0) + return num_op_counters; + + return num_hw_counters + num_op_counters; +} + static struct rdma_hw_stats * mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) { -- cgit From 9a33f3980978e420672b0b54e3fa0c7908fd7c78 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sun, 26 Sep 2021 14:11:15 +0800 Subject: RDMA/hns: Use dma_alloc_coherent() instead of kmalloc/dma_map_single() Replacing kmalloc/kfree/dma_map_single/dma_unmap_single() with dma_alloc_coherent/dma_free_coherent() helps to reduce code size, and simplify the code, and coherent DMA will not clear the cache every time. The SOC that this driver supports does not have incoherent DMA, so this makes the code follow the DMA API properly with no performance impact. Currently there are missing dma sync calls around the DMA transfers. Link: https://lore.kernel.org/r/20210926061116.282-1-caihuoqing@baidu.com Signed-off-by: Cai Huoqing Reviewed-by: Wenpeng Liang Tested-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d5f3faa1627a..91990fad7185 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1165,32 +1165,22 @@ static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, { int size = ring->desc_num * sizeof(struct hns_roce_cmq_desc); - ring->desc = kzalloc(size, GFP_KERNEL); + ring->desc = dma_alloc_coherent(hr_dev->dev, size, + &ring->desc_dma_addr, GFP_KERNEL); if (!ring->desc) return -ENOMEM; - ring->desc_dma_addr = dma_map_single(hr_dev->dev, ring->desc, size, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(hr_dev->dev, ring->desc_dma_addr)) { - ring->desc_dma_addr = 0; - kfree(ring->desc); - ring->desc = NULL; - - return -ENOMEM; - } - return 0; } static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, struct hns_roce_v2_cmq_ring *ring) { - dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, - ring->desc_num * sizeof(struct hns_roce_cmq_desc), - DMA_BIDIRECTIONAL); + dma_free_coherent(hr_dev->dev, + ring->desc_num * sizeof(struct hns_roce_cmq_desc), + ring->desc, ring->desc_dma_addr); ring->desc_dma_addr = 0; - kfree(ring->desc); } static int init_csq(struct hns_roce_dev *hr_dev, -- cgit From 4bd46f3a986db97fdb67b05a6b379499fce179ea Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Oct 2021 18:39:42 +0100 Subject: RDMA/iwpm: Remove redundant initialization of pointer err_str The pointer err_str is being initialized with a value that is never read, it is being updated later on. The assignment is redundant and can be removed. Link: https://lore.kernel.org/r/20211007173942.21933-1-colin.king@canonical.com Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/iwpm_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 54f4feb604d8..358a2db38d23 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -762,7 +762,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; - const char *err_str = ""; + const char *err_str; int ret = -EINVAL; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client); -- cgit From 8869574a6c11694dda6598c72b8e96cd1ac4e673 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 10 Oct 2021 16:08:10 +0200 Subject: RDMA: Remove redundant 'flush_workqueue()' calls 'destroy_workqueue()' already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Link: https://lore.kernel.org/r/ca7bac6e6c9c5cc8d04eec3944edb13de0e381a3.1633874776.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 1 - drivers/infiniband/hw/cxgb4/cm.c | 1 - drivers/infiniband/hw/cxgb4/device.c | 1 - drivers/infiniband/hw/mlx4/alias_GUID.c | 4 +--- drivers/infiniband/sw/siw/siw_cm.c | 4 +--- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - 6 files changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a20b8108e160..4220a545387f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2261,7 +2261,6 @@ err1: void ib_sa_cleanup(void) { cancel_delayed_work(&ib_nl_timed_work); - flush_workqueue(ib_nl_wq); destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 291471d12197..913f39ee4416 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -4464,6 +4464,5 @@ int __init c4iw_cm_init(void) void c4iw_cm_term(void) { WARN_ON(!list_empty(&timeout_list)); - flush_workqueue(workq); destroy_workqueue(workq); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 541dbcf22d0e..80970a1738f8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1562,7 +1562,6 @@ static void __exit c4iw_exit_module(void) kfree(ctx); } mutex_unlock(&dev_mutex); - flush_workqueue(reg_workq); destroy_workqueue(reg_workq); cxgb4_unregister_uld(CXGB4_ULD_RDMA); c4iw_cm_term(); diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 571d9c542024..e2e1f5daddc4 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -822,10 +822,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) } spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); } - for (i = 0 ; i < dev->num_ports; i++) { - flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + for (i = 0 ; i < dev->num_ports; i++) destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); - } ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); kfree(dev->sriov.alias_guid.sa_client); } diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7a5ed86ffc9f..7acdd3c3a599 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1951,8 +1951,6 @@ int siw_cm_init(void) void siw_cm_exit(void) { - if (siw_cm_wq) { - flush_workqueue(siw_cm_wq); + if (siw_cm_wq) destroy_workqueue(siw_cm_wq); - } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0aa8629fdf62..9c9da5aa592a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1997,7 +1997,6 @@ static void ipoib_ndo_uninit(struct net_device *dev) if (priv->wq) { /* See ipoib_mcast_carrier_on_task() */ WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); - flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; } -- cgit From 0bed5dfa5af8304c99962aa713a9ecd67ee52cb8 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 11 Oct 2021 07:01:25 -0400 Subject: RDMA/irdma: Remove irdma_uk_mw_bind() The function irdma_uk_mw_bind is not used. So remove it. Link: https://lore.kernel.org/r/20211011110128.4057-2-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 57 -------------------------------------- drivers/infiniband/hw/irdma/user.h | 4 +-- 2 files changed, 1 insertion(+), 60 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 5fb92de1f015..ebcd93bb9e9d 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -866,63 +866,6 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, return 0; } -/** - * irdma_uk_mw_bind - bind Memory Window - * @qp: hw qp ptr - * @info: post sq information - * @post_sq: flag to post sq - */ -enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) -{ - __le64 *wqe; - struct irdma_bind_window *op_info; - u64 hdr; - u32 wqe_idx; - bool local_fence = false; - - info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.bind_window; - local_fence |= info->local_fence; - - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, - 0, info); - if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; - - irdma_clr_wqes(qp, wqe_idx); - - qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info); - - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_BIND_MW) | - FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, - ((op_info->ena_reads << 2) | (op_info->ena_writes << 3))) | - FIELD_PREP(IRDMAQPSQ_VABASEDTO, - (op_info->addressing_type == IRDMA_ADDR_TYPE_VA_BASED ? 1 : 0)) | - FIELD_PREP(IRDMAQPSQ_MEMWINDOWTYPE, - (op_info->mem_window_type_1 ? 1 : 0)) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | - FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | - FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | - FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); - - dma_wmb(); /* make sure WQE is populated before valid bit is set */ - - set_64bit_val(wqe, 24, hdr); - - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx, - post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } - - return 0; -} - /** * irdma_uk_post_receive - post receive wqe * @qp: hw qp ptr diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 3dcbb1fbf2c6..31d5e4e3f442 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -283,9 +283,7 @@ enum irdma_status_code irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); -enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); + enum irdma_status_code irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, -- cgit From 6d2682216d1fd2b54ee16dbb6afd8a597645dbad Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 11 Oct 2021 07:01:26 -0400 Subject: RDMA/irdma: Remove irdma_sc_send_lsmm_nostag() The function irdma_sc_send_lsmm_nostag is not used. So remove it. Link: https://lore.kernel.org/r/20211011110128.4057-3-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 38 -------------------------------------- drivers/infiniband/hw/irdma/type.h | 2 +- 2 files changed, 1 insertion(+), 39 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index f1e5515256e0..729fa8a3f6f8 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1419,44 +1419,6 @@ void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, irdma_sc_gen_rts_ae(qp); } -/** - * irdma_sc_send_lsmm_nostag - for privilege qp - * @qp: sc qp struct - * @lsmm_buf: buffer with lsmm message - * @size: size of lsmm buffer - */ -void irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size) -{ - __le64 *wqe; - u64 hdr; - struct irdma_qp_uk *qp_uk; - - qp_uk = &qp->qp_uk; - wqe = qp_uk->sq_base->elem; - - set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf); - - if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) - set_64bit_val(wqe, 8, - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, size)); - else - set_64bit_val(wqe, 8, - FIELD_PREP(IRDMAQPSQ_FRAG_LEN, size) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity)); - set_64bit_val(wqe, 16, 0); - - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_SEND) | - FIELD_PREP(IRDMAQPSQ_STREAMMODE, 1) | - FIELD_PREP(IRDMAQPSQ_WAITFORRCVPDU, 1) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); - dma_wmb(); /* make sure WQE is written before valid bit is set */ - - set_64bit_val(wqe, 24, hdr); - - print_hex_dump_debug("WQE: SEND_LSMM_NOSTAG WQE", DUMP_PREFIX_OFFSET, - 16, 8, wqe, IRDMA_QP_WQE_MIN_SIZE, false); -} - /** * irdma_sc_send_rtt - send last read0 or write0 * @qp: sc qp struct diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 874bc25a938b..4312f2070534 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1256,7 +1256,7 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, u64 scratch, bool post_sq); void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, irdma_stag stag); -void irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size); + void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read); void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); -- cgit From 16ddcfca567114ff8618ceb0bba001d8b15129e7 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 11 Oct 2021 07:01:27 -0400 Subject: RDMA/irdma: Remove irdma_get_hw_addr() The function irdma_get_hw_addr() is not used. So remove it. Link: https://lore.kernel.org/r/20211011110128.4057-4-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/osdep.h | 1 - drivers/infiniband/hw/irdma/utils.c | 11 ----------- 2 files changed, 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h index b2ab52335ca6..63d8bb3a6903 100644 --- a/drivers/infiniband/hw/irdma/osdep.h +++ b/drivers/infiniband/hw/irdma/osdep.h @@ -37,7 +37,6 @@ struct irdma_hw; struct irdma_pci_f; struct ib_device *to_ibdev(struct irdma_sc_dev *dev); -u8 __iomem *irdma_get_hw_addr(void *dev); void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev); bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index ac91ea5296db..84bc7b659d76 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -767,17 +767,6 @@ struct ib_qp *irdma_get_qp(struct ib_device *device, int qpn) return &iwdev->rf->qp_table[qpn]->ibqp; } -/** - * irdma_get_hw_addr - return hw addr - * @par: points to shared dev - */ -u8 __iomem *irdma_get_hw_addr(void *par) -{ - struct irdma_sc_dev *dev = par; - - return dev->hw->hw_addr; -} - /** * irdma_remove_cqp_head - return head entry and remove * @dev: device -- cgit From 9d8f247cc33c881812d2ae106a79292c0d0446b8 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 11 Oct 2021 07:01:28 -0400 Subject: RDMA/irdma: Remove irdma_cqp_up_map_cmd() The function irdma_cqp_up_map_cmd() is not used. So remove it. Link: https://lore.kernel.org/r/20211011110128.4057-5-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/protos.h | 2 -- drivers/infiniband/hw/irdma/utils.c | 34 ---------------------------------- 2 files changed, 36 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h index 78f598fdbccf..a17c0ffb0cc8 100644 --- a/drivers/infiniband/hw/irdma/protos.h +++ b/drivers/infiniband/hw/irdma/protos.h @@ -37,8 +37,6 @@ void irdma_hw_stats_read_all(struct irdma_vsi_pestat *stats, enum irdma_status_code irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, struct irdma_ws_node_info *node_info); -enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_up_info *map_info); enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, u8 op); enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 84bc7b659d76..0ebce57e8756 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2048,40 +2048,6 @@ exit: return status; } -/** - * irdma_cqp_up_map_cmd - Set the up-up mapping - * @dev: pointer to device structure - * @cmd: map command - * @map_info: pointer to up map info - */ -enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_up_info *map_info) -{ - struct irdma_pci_f *rf = dev_to_rf(dev); - struct irdma_cqp *iwcqp = &rf->cqp; - struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp; - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; - - cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, false); - if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; - - cqp_info = &cqp_request->info; - memset(cqp_info, 0, sizeof(*cqp_info)); - cqp_info->cqp_cmd = cmd; - cqp_info->post_sq = 1; - cqp_info->in.u.up_map.info = *map_info; - cqp_info->in.u.up_map.cqp = cqp; - cqp_info->in.u.up_map.scratch = (uintptr_t)cqp_request; - - status = irdma_handle_cqp_op(rf, cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - - return status; -} - /** * irdma_ah_cqp_op - perform an AH cqp operation * @rf: RDMA PCI function -- cgit From f4e56ec4452f48b8292dcf0e1c4bdac83506fb8b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Oct 2021 10:28:43 +0300 Subject: RDMA/mlx4: Return missed an error if device doesn't support steering The error flow fixed in this patch is not possible because all kernel users of create QP interface check that device supports steering before set IB_QP_CREATE_NETIF_QP flag. Fixes: c1c98501121e ("IB/mlx4: Add support for steerable IB UD QPs") Link: https://lore.kernel.org/r/91c61f6e60eb0240f8bbc321fda7a1d2986dd03c.1634023677.git.leonro@nvidia.com Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8662f462e2a5..3a1a4ac9dd33 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1099,8 +1099,10 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) qp->flags |= MLX4_IB_QP_NETIF; - else + else { + err = -EINVAL; goto err; + } } err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); -- cgit From cfc0312d9c83a5bbb66fa73ba47dd1301d75b2e8 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 7 Oct 2021 15:40:47 -0500 Subject: RDMA/rxe: Move AV from rxe_send_wqe to rxe_send_wr Move the struct rxe_av av from struct rxe_send_wqe to struct rxe_send_wr placing it in wr.ud at the same offset as it was previously. This has the effect of increasing the size of struct rxe_send_wr while keeping the size of struct rxe_send_wqe the same. This better reflects the use of this field which is only used for UD sends. This change has no effect on ABI compatibility so the modified rxe driver will operate with older versions of rdma-core. Link: https://lore.kernel.org/r/20211007204051.10086-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_av.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index da2e867a1ed9..85580ea5eed0 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -107,5 +107,5 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) return &pkt->qp->pri_av; - return (pkt->wqe) ? &pkt->wqe->av : NULL; + return (pkt->wqe) ? &pkt->wqe->wr.wr.ud.av : NULL; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index c49ba0381964..4233fd9edfd1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -581,7 +581,8 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) - memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); + memcpy(&wqe->wr.wr.ud.av, &to_rah(ud_wr(ibwr)->ah)->av, + sizeof(struct rxe_av)); if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) copy_inline_data_to_wqe(wqe, ibwr); -- cgit From 99c13a3e29653badefd9ca36f62dfa84db504f1c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 7 Oct 2021 15:40:48 -0500 Subject: RDMA/rxe: Change AH objects to indexed Make changes to rxe_param.h and rxe_pool.c to allow indexing of AH objects. Valid indices are non-zero so older providers can be detected. Link: https://lore.kernel.org/r/20211007204051.10086-3-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_param.h | 4 +++- drivers/infiniband/sw/rxe/rxe_pool.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index aeffaef8f381..918270e34a35 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -69,7 +69,9 @@ enum rxe_device_param { RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, - RXE_MAX_AH = DEFAULT_MAX_VALUE, + RXE_MAX_AH = (1<<15) - 1, /* 32Ki - 1 */ + RXE_MIN_AH_INDEX = 1, + RXE_MAX_AH_INDEX = RXE_MAX_AH, RXE_MAX_SRQ_WR = DEFAULT_MAX_VALUE, RXE_MIN_SRQ_WR = 1, RXE_MAX_SRQ_SGE = 27, diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index ffa8420b4765..7b4cb46edfd9 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -26,7 +26,9 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), .elem_offset = offsetof(struct rxe_ah, pelem), - .flags = RXE_POOL_NO_ALLOC, + .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, + .min_index = RXE_MIN_AH_INDEX, + .max_index = RXE_MAX_AH_INDEX, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", -- cgit From 73a54932100375ba94b31710f1e3f1234f23be0b Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 7 Oct 2021 15:40:49 -0500 Subject: RDMA/rxe: Create AH index and return to user space Make changes to rdma_user_rxe.h to allow indexing AH objects, passing the index in UD send WRs to the driver and returning the index to the rxe provider. Modify rxe_create_ah() to add an index to AH when created and if called from a new user provider return it to user space. If called from an old provider mark the AH as not having a useful index. Modify rxe_destroy_ah to drop the index before deleting the object. Link: https://lore.kernel.org/r/20211007204051.10086-4-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 31 ++++++++++++++++++++++++++++++- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 ++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4233fd9edfd1..e20f2dd20639 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -158,9 +158,19 @@ static int rxe_create_ah(struct ib_ah *ibah, struct ib_udata *udata) { - int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); + struct rxe_create_ah_resp __user *uresp = NULL; + int err; + + if (udata) { + /* test if new user provider */ + if (udata->outlen >= sizeof(*uresp)) + uresp = udata->outbuf; + ah->is_user = true; + } else { + ah->is_user = false; + } err = rxe_av_chk_attr(rxe, init_attr->ah_attr); if (err) @@ -170,6 +180,24 @@ static int rxe_create_ah(struct ib_ah *ibah, if (err) return err; + /* create index > 0 */ + rxe_add_index(ah); + ah->ah_num = ah->pelem.index; + + if (uresp) { + /* only if new user provider */ + err = copy_to_user(&uresp->ah_num, &ah->ah_num, + sizeof(uresp->ah_num)); + if (err) { + rxe_drop_index(ah); + rxe_drop_ref(ah); + return -EFAULT; + } + } else if (ah->is_user) { + /* only if old user provider */ + ah->ah_num = 0; + } + rxe_init_av(init_attr->ah_attr, &ah->av); return 0; } @@ -202,6 +230,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); + rxe_drop_index(ah); rxe_drop_ref(ah); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 098fde693dbd..c56fae23b6a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -48,6 +48,8 @@ struct rxe_ah { struct rxe_pool_entry pelem; struct rxe_pd *pd; struct rxe_av av; + bool is_user; + int ah_num; }; struct rxe_cqe { -- cgit From 4da698eabf0f7c1c1806670d3fd3895512fdf43b Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 7 Oct 2021 15:40:50 -0500 Subject: RDMA/rxe: Replace ah->pd by ah->ibah.pd The pd field in struct rxe_ah is redundant with the pd field in the rdma-core's ib_ah. Eliminate the pd field in rxe_ah and add an inline to extract the pd from the ibah field. Link: https://lore.kernel.org/r/20211007204051.10086-5-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index c56fae23b6a9..35e041450090 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -46,7 +46,6 @@ struct rxe_pd { struct rxe_ah { struct ib_ah ibah; struct rxe_pool_entry pelem; - struct rxe_pd *pd; struct rxe_av av; bool is_user; int ah_num; @@ -468,6 +467,11 @@ static inline struct rxe_mw *to_rmw(struct ib_mw *mw) return mw ? container_of(mw, struct rxe_mw, ibmw) : NULL; } +static inline struct rxe_pd *rxe_ah_pd(struct rxe_ah *ah) +{ + return to_rpd(ah->ibah.pd); +} + static inline struct rxe_pd *mr_pd(struct rxe_mr *mr) { return to_rpd(mr->ibmr.pd); -- cgit From e2fe06c9080694f2c28894f14ef8ebfd923ac94d Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 7 Oct 2021 15:40:51 -0500 Subject: RDMA/rxe: Lookup kernel AH from ah index in UD WQEs Add code to rxe_get_av in rxe_av.c to use the AH index in UD send WQEs to lookup the kernel AH. For old user providers continue to use the AV passed in WQEs. Move setting pkt->rxe to before the call to rxe_get_av() to get access to the AH pool. Link: https://lore.kernel.org/r/20211007204051.10086-6-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_av.c | 20 +++++++++++++++++++- drivers/infiniband/sw/rxe/rxe_req.c | 8 +++++--- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 85580ea5eed0..38c7b6fb39d7 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -101,11 +101,29 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) { + struct rxe_ah *ah; + u32 ah_num; + if (!pkt || !pkt->qp) return NULL; if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) return &pkt->qp->pri_av; - return (pkt->wqe) ? &pkt->wqe->wr.wr.ud.av : NULL; + if (!pkt->wqe) + return NULL; + + ah_num = pkt->wqe->wr.wr.ud.ah_num; + if (ah_num) { + /* only new user provider or kernel client */ + ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); + if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("Unable to find AH matching ah_num\n"); + return NULL; + } + return &ah->av; + } + + /* only old user provider for UD sends*/ + return &pkt->wqe->wr.wr.ud.av; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index fe275fcaffbd..0c9d2af15f3d 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -379,9 +379,8 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - /* pkt->hdr, rxe, port_num and mask are initialized in ifc - * layer - */ + /* pkt->hdr, port_num and mask are initialized in ifc layer */ + pkt->rxe = rxe; pkt->opcode = opcode; pkt->qp = qp; pkt->psn = qp->req.psn; @@ -391,6 +390,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* init skb */ av = rxe_get_av(pkt); + if (!av) + return NULL; + skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; -- cgit From 3b87e0824272414cec79763afef6720c7c908c44 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 7 Oct 2021 15:40:52 -0500 Subject: RDMA/rxe: Convert kernel UD post send to use ah_num Modify ib_post_send for kernel UD sends to put the AH index into the WQE instead of the address vector. Link: https://lore.kernel.org/r/20211007204051.10086-7-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index e20f2dd20639..0aa0d7e52773 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -534,8 +534,11 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { + struct ib_ah *ibah = ud_wr(ibwr)->ah; + wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; + wr->wr.ud.ah_num = to_rah(ibah)->ah_num; if (qp_type(qp) == IB_QPT_GSI) wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; if (wr->opcode == IB_WR_SEND_WITH_IMM) @@ -607,12 +610,6 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, return; } - if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || - qp_type(qp) == IB_QPT_GSI) - memcpy(&wqe->wr.wr.ud.av, &to_rah(ud_wr(ibwr)->ah)->av, - sizeof(struct rxe_av)); - if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) copy_inline_data_to_wqe(wqe, ibwr); else -- cgit From dede33da0d97a423aae21b12c3bdd282b832853c Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 19 Oct 2021 11:37:17 -0400 Subject: RDMA/irdma: Make irdma_uk_cq_init() return a void The function irdma_uk_cq_init always returns 0, so make it void and delete all the return value checks. Link: https://lore.kernel.org/r/20211019153717.3836-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 5 +---- drivers/infiniband/hw/irdma/uk.c | 6 ++---- drivers/infiniband/hw/irdma/user.h | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 729fa8a3f6f8..7264f8c2f7d5 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2463,7 +2463,6 @@ static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq) enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info) { - enum irdma_status_code ret_code; u32 pble_obj_cnt; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; @@ -2475,9 +2474,7 @@ enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, cq->ceq_id = info->ceq_id; info->cq_uk_init_info.cqe_alloc_db = cq->dev->cq_arm_db; info->cq_uk_init_info.cq_ack_db = cq->dev->cq_ack_db; - ret_code = irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info); - if (ret_code) - return ret_code; + irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info); cq->virtual_map = info->virtual_map; cq->pbl_chunk_size = info->pbl_chunk_size; diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index ebcd93bb9e9d..cc578974ad0e 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1446,8 +1446,8 @@ enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, * @cq: hw cq * @info: hw cq initialization info */ -enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, - struct irdma_cq_uk_init_info *info) +void irdma_uk_cq_init(struct irdma_cq_uk *cq, + struct irdma_cq_uk_init_info *info) { cq->cq_base = info->cq_base; cq->cq_id = info->cq_id; @@ -1458,8 +1458,6 @@ enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, cq->avoid_mem_cflct = info->avoid_mem_cflct; IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); cq->polarity = 1; - - return 0; } /** diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 31d5e4e3f442..66e00660fbaa 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -316,8 +316,8 @@ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify); void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); -enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, - struct irdma_cq_uk_init_info *info); +void irdma_uk_cq_init(struct irdma_cq_uk *cq, + struct irdma_cq_uk_init_info *info); enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); struct irdma_sq_uk_wr_trk_info { -- cgit From 911a81c9c7092bfd75432ce79b2ef879127ea065 Mon Sep 17 00:00:00 2001 From: wangyugui Date: Tue, 19 Oct 2021 08:26:56 +0800 Subject: RDMA/core: Use kvzalloc when allocating the struct ib_port The 'struct attribute' flex array contains some struct lock_class_key's which become big when lockdep is turned on. Big enough that some drivers will not load when CONFIG_PROVE_LOCKING=y because they cannot allocate enough memory: WARNING: CPU: 36 PID: 8 at mm/page_alloc.c:5350 __alloc_pages+0x27e/0x3e0 Call Trace: kmalloc_order+0x2a/0xb0 kmalloc_order_trace+0x19/0xf0 __kmalloc+0x231/0x270 ib_setup_port_attrs+0xd8/0x870 [ib_core] ib_register_device+0x419/0x4e0 [ib_core] bnxt_re_task+0x208/0x2d0 [bnxt_re] Link: https://lore.kernel.org/r/20211019002656.17745-1-wangyugui@e16-tech.com Signed-off-by: wangyugui Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 1bf3aea4b71e..8626dfbf2199 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -757,7 +757,7 @@ static void ib_port_release(struct kobject *kobj) if (port->hw_stats_data) rdma_free_hw_stats_struct(port->hw_stats_data->stats); kfree(port->hw_stats_data); - kfree(port); + kvfree(port); } static void ib_port_gid_attr_release(struct kobject *kobj) @@ -1205,7 +1205,7 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, struct ib_port *p; int ret; - p = kzalloc(struct_size(p, attrs_list, + p = kvzalloc(struct_size(p, attrs_list, attr->gid_tbl_len + attr->pkey_tbl_len), GFP_KERNEL); if (!p) -- cgit From 97ad8c8c719d0f7fbf3e828be7e99121b4b05b1f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Oct 2021 08:15:45 +0200 Subject: RDMA/mlx5: fix build error with INFINIBAND_USER_ACCESS=n The mlx5_ib_fs_add_op_fc/mlx5_ib_fs_remove_op_fc functions are only available when user access is enabled, without that we run into a link error: ERROR: modpost: "mlx5_ib_fs_add_op_fc" [drivers/infiniband/hw/mlx5/mlx5_ib.ko] undefined! ERROR: modpost: "mlx5_ib_fs_remove_op_fc" [drivers/infiniband/hw/mlx5/mlx5_ib.ko] undefined! Conditionally compiling the newly added code section makes it build, though this is probably not a correct fix. Fixes: a29b934ceb4c ("RDMA/mlx5: Add modify_op_stat() support") Link: https://lore.kernel.org/r/20211019061602.3062196-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/counters.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 6f1c4b57110e..945758f39523 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -641,9 +641,9 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) if (!dev->port[i].cnts.opfcs[j].fc) continue; - mlx5_ib_fs_remove_op_fc(dev, - &dev->port[i].cnts.opfcs[j], - j); + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + mlx5_ib_fs_remove_op_fc(dev, + &dev->port[i].cnts.opfcs[j], j); mlx5_fc_destroy(dev->mdev, dev->port[i].cnts.opfcs[j].fc); dev->port[i].cnts.opfcs[j].fc = NULL; @@ -885,7 +885,8 @@ static const struct ib_device_ops hw_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, - .modify_hw_stat = mlx5_ib_modify_stat, + .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? + mlx5_ib_modify_stat : NULL, }; static const struct ib_device_ops hw_switchdev_stats_ops = { -- cgit From 10f7b9bc85ec2421c46588732ea72a58c00d0926 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 11:26:02 -0700 Subject: RDMA/ipoib: Use dev_addr_mod() Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Link: https://lore.kernel.org/r/20211019182604.1441387-2-kuba@kernel.org Signed-off-by: Jakub Kicinski Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 +++- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 9 ++++----- drivers/infiniband/ulp/ipoib/ipoib_main.c | 17 +++++++++-------- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 684c2ddb16f5..fd9d7f2c4d64 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1583,6 +1583,7 @@ int ipoib_cm_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); int max_srq_sge, i; + u8 addr; INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.reap_list); @@ -1636,7 +1637,8 @@ int ipoib_cm_dev_init(struct net_device *dev) } } - priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; + addr = IPOIB_FLAGS_RC; + dev_addr_mod(dev, 0, &addr, 1); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ceabfb0b0a83..2c3dca41d3bd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1057,13 +1057,11 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) { union ib_gid search_gid; union ib_gid gid0; - union ib_gid *netdev_gid; int err; u16 index; u32 port; bool ret = false; - netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); if (rdma_query_gid(priv->ca, priv->port, 0, &gid0)) return false; @@ -1073,7 +1071,8 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) * to do it later */ priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix; - netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix; + dev_addr_mod(priv->dev, 4, (u8 *)&gid0.global.subnet_prefix, + sizeof(gid0.global.subnet_prefix)); search_gid.global.subnet_prefix = gid0.global.subnet_prefix; search_gid.global.interface_id = priv->local_gid.global.interface_id; @@ -1135,8 +1134,8 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) { memcpy(&priv->local_gid, &gid0, sizeof(priv->local_gid)); - memcpy(priv->dev->dev_addr + 4, &gid0, - sizeof(priv->local_gid)); + dev_addr_mod(priv->dev, 4, (u8 *)&gid0, + sizeof(priv->local_gid)); ret = true; } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9c9da5aa592a..9934b8bd7f56 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1696,6 +1696,7 @@ static void ipoib_dev_uninit_default(struct net_device *dev) static int ipoib_dev_init_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + u8 addr_mod[3]; ipoib_napi_add(dev); @@ -1723,9 +1724,10 @@ static int ipoib_dev_init_default(struct net_device *dev) } /* after qp created set dev address */ - priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; - priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; - priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; + addr_mod[0] = (priv->qp->qp_num >> 16) & 0xff; + addr_mod[1] = (priv->qp->qp_num >> 8) & 0xff; + addr_mod[2] = (priv->qp->qp_num) & 0xff; + dev_addr_mod(priv->dev, 1, addr_mod, sizeof(addr_mod)); return 0; @@ -1886,8 +1888,7 @@ static int ipoib_parent_init(struct net_device *ndev) priv->ca->name, priv->port, result); return result; } - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, - sizeof(union ib_gid)); + dev_addr_mod(priv->dev, 4, priv->local_gid.raw, sizeof(union ib_gid)); SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); priv->dev->dev_port = priv->port - 1; @@ -1908,8 +1909,8 @@ static void ipoib_child_init(struct net_device *ndev) memcpy(&priv->local_gid, priv->dev->dev_addr + 4, sizeof(priv->local_gid)); else { - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, - INFINIBAND_ALEN); + __dev_addr_set(priv->dev, ppriv->dev->dev_addr, + INFINIBAND_ALEN); memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); } @@ -2326,7 +2327,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) memcpy(&priv->local_gid.global.interface_id, &gid->global.interface_id, sizeof(gid->global.interface_id)); - memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); + dev_addr_mod(netdev, 4, (u8 *)&priv->local_gid, sizeof(priv->local_gid)); clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); netif_addr_unlock_bh(netdev); -- cgit From fd92213e9af3b8e5cb3b4d3bf925c9baafb46c9e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Oct 2021 11:26:04 -0700 Subject: RDMA: Constify netdev->dev_addr accesses netdev->dev_addr will become const soon, make sure drivers propagate the qualifier. Link: https://lore.kernel.org/r/20211019182604.1441387-4-kuba@kernel.org Signed-off-by: Jakub Kicinski Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.h | 2 +- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 6 +++--- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 5 +++-- drivers/infiniband/hw/hfi1/ipoib_main.c | 2 +- drivers/infiniband/hw/hns/hns_roce_device.h | 3 ++- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 10 +++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_main.c | 3 ++- drivers/infiniband/hw/irdma/cm.h | 4 ++-- drivers/infiniband/hw/irdma/hw.c | 7 ++++--- drivers/infiniband/hw/irdma/main.h | 5 +++-- drivers/infiniband/hw/irdma/trace_cm.h | 8 +++++--- drivers/infiniband/hw/irdma/utils.c | 4 ++-- drivers/infiniband/hw/irdma/verbs.c | 2 +- drivers/infiniband/hw/usnic/usnic_fwd.c | 2 +- drivers/infiniband/hw/usnic/usnic_fwd.h | 2 +- 17 files changed, 40 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index bf49363f590e..a1d8a87dc678 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -573,7 +573,7 @@ fail: } /* GUID */ -void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid) +void bnxt_qplib_get_guid(const u8 *dev_addr, u8 *guid) { u8 mac[ETH_ALEN]; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c39b20236f16..d2951a713cc8 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -346,7 +346,7 @@ void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res, struct bnxt_qplib_hwq *hwq); int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_hwq_attr *hwq_attr); -void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid); +void bnxt_qplib_get_guid(const u8 *dev_addr, u8 *guid); int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, struct bnxt_qplib_pd *pd); int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index cbe83e9bce5c..379e715ebd30 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -287,8 +287,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, } int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, u8 *smac, u16 vlan_id, - bool update, u32 *index) + struct bnxt_qplib_gid *gid, const u8 *smac, + u16 vlan_id, bool update, u32 *index) { struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, struct bnxt_qplib_res, @@ -379,7 +379,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, - u8 *smac) + const u8 *smac) { struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, struct bnxt_qplib_res, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 3d5c41841668..a18f568cb23e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -250,10 +250,11 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 vlan_id, bool update); int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id, + struct bnxt_qplib_gid *gid, const u8 *mac, u16 vlan_id, bool update, u32 *index); int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, u16 gid_idx, u8 *smac); + struct bnxt_qplib_gid *gid, u16 gid_idx, + const u8 *smac); int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, u16 *pkey); diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index e594a961f513..e1a2b02bbd91 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -11,7 +11,7 @@ #include "ipoib.h" #include "hfi.h" -static u32 qpn_from_mac(u8 *mac_arr) +static u32 qpn_from_mac(const u8 *mac_arr) { return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3]; } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9467c39e3d28..e5dadcd118ac 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -898,7 +898,8 @@ struct hns_roce_hw { bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy); int (*set_gid)(struct hns_roce_dev *hr_dev, u32 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); - int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); + int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, + const u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index e0f59b8d7d5d..f4af3992ba95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -90,11 +90,11 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, unsigned long flags = 0; void *wqe = NULL; __le32 doorbell[2]; + const u8 *smac; int ret = 0; int loopback; u32 wqe_idx; int nreq; - u8 *smac; if (unlikely(ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_RC)) { @@ -154,7 +154,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, UD_SEND_WQE_U32_8_DMAC_5_S, ah->av.mac[5]); - smac = (u8 *)hr_dev->dev_addr[qp->port]; + smac = (const u8 *)hr_dev->dev_addr[qp->port]; loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; roce_set_bit(ud_sq_wqe->u32_8, @@ -1782,7 +1782,7 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u32 port, } static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, - u8 *addr) + const u8 *addr) { u32 reg_smac_l; u16 reg_smac_h; @@ -2743,12 +2743,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, __le32 doorbell[2] = {0}; u64 *mtts_2 = NULL; int ret = -EINVAL; + const u8 *smac; u64 sq_ba = 0; u64 rq_ba = 0; u32 port; u32 port_num; u8 *dmac; - u8 *smac; if (!check_qp_state(cur_state, new_state)) { ibdev_err(ibqp->device, @@ -2947,7 +2947,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; - smac = (u8 *)hr_dev->dev_addr[port]; + smac = (const u8 *)hr_dev->dev_addr[port]; /* when dmac equals smac or loop_idc is 1, it should loopback */ if (ether_addr_equal_unaligned(dmac, smac) || hr_dev->loop_idc == 0x1) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 91990fad7185..c2916b170a41 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2982,7 +2982,7 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u32 port, } static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, - u8 *addr) + const u8 *addr) { struct hns_roce_cmq_desc desc; struct hns_roce_cfg_smac_tb *smac_tb = @@ -4308,10 +4308,10 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu ib_mtu; + const u8 *smac; u8 lp_pktn_ini; u64 *mtts; u8 *dmac; - u8 *smac; u32 port; int mtu; int ret; @@ -4364,7 +4364,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; - smac = (u8 *)hr_dev->dev_addr[port]; + smac = (const u8 *)hr_dev->dev_addr[port]; dmac = (u8 *)attr->ah_attr.roce.dmac; /* when dmac equals smac or loop_idc is 1, it should loopback */ if (ether_addr_equal_unaligned(dmac, smac) || diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 5d39bd08582a..b3595b6079b5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,7 +42,8 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" -static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, u8 *addr) +static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, + const u8 *addr) { u8 phy_port; u32 i; diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h index 1fbe72e18625..3bf42728e9b7 100644 --- a/drivers/infiniband/hw/irdma/cm.h +++ b/drivers/infiniband/hw/irdma/cm.h @@ -389,7 +389,7 @@ int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int irdma_create_listen(struct iw_cm_id *cm_id, int backlog); int irdma_destroy_listen(struct iw_cm_id *cm_id); -int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac); +int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac); void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all); @@ -398,7 +398,7 @@ int irdma_cm_stop(struct irdma_device *dev); bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr); bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr); int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, - u8 *mac_addr, u32 action); + const u8 *mac_addr, u32 action); void irdma_if_notify(struct irdma_device *iwdev, struct net_device *netdev, u32 *ipaddr, bool ipv4, bool ifup); bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port); diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7de525a5ccf8..4108dcabece2 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1057,7 +1057,7 @@ static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev) &iwdev->mac_ip_table_idx); if (!status) { status = irdma_add_local_mac_entry(iwdev->rf, - (u8 *)iwdev->netdev->dev_addr, + (const u8 *)iwdev->netdev->dev_addr, (u8)iwdev->mac_ip_table_idx); if (status) irdma_del_local_mac_entry(iwdev->rf, @@ -2191,7 +2191,7 @@ void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx) * @mac_addr: pointer to mac address * @idx: the index of the mac ip address to add */ -int irdma_add_local_mac_entry(struct irdma_pci_f *rf, u8 *mac_addr, u16 idx) +int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx) { struct irdma_local_mac_entry_info *info; struct irdma_cqp *iwcqp = &rf->cqp; @@ -2362,7 +2362,8 @@ void irdma_del_apbvt(struct irdma_device *iwdev, * @ipv4: flag inicating IPv4 * @action: add, delete or modify */ -void irdma_manage_arp_cache(struct irdma_pci_f *rf, unsigned char *mac_addr, +void irdma_manage_arp_cache(struct irdma_pci_f *rf, + const unsigned char *mac_addr, u32 *ip_addr, bool ipv4, u32 action) { struct irdma_add_arp_cache_entry_info *info; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index b678fe712447..91a497139ba3 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -467,7 +467,8 @@ void irdma_qp_rem_ref(struct ib_qp *ibqp); void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp); struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn); void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask); -void irdma_manage_arp_cache(struct irdma_pci_f *rf, unsigned char *mac_addr, +void irdma_manage_arp_cache(struct irdma_pci_f *rf, + const unsigned char *mac_addr, u32 *ip_addr, bool ipv4, u32 action); struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port); void irdma_del_apbvt(struct irdma_device *iwdev, @@ -479,7 +480,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp, void irdma_put_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request); int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx); -int irdma_add_local_mac_entry(struct irdma_pci_f *rf, u8 *mac_addr, u16 idx); +int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx); void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx); u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf); diff --git a/drivers/infiniband/hw/irdma/trace_cm.h b/drivers/infiniband/hw/irdma/trace_cm.h index bcf10ec427d6..f633fb343328 100644 --- a/drivers/infiniband/hw/irdma/trace_cm.h +++ b/drivers/infiniband/hw/irdma/trace_cm.h @@ -144,7 +144,7 @@ DEFINE_EVENT(tos_template, irdma_dcb_tos, DECLARE_EVENT_CLASS(qhash_template, TP_PROTO(struct irdma_device *iwdev, struct irdma_cm_listener *listener, - char *dev_addr), + const char *dev_addr), TP_ARGS(iwdev, listener, dev_addr), TP_STRUCT__entry(__field(struct irdma_device *, iwdev) __field(u16, lport) @@ -173,12 +173,14 @@ DECLARE_EVENT_CLASS(qhash_template, DEFINE_EVENT(qhash_template, irdma_add_mqh_6, TP_PROTO(struct irdma_device *iwdev, - struct irdma_cm_listener *listener, char *dev_addr), + struct irdma_cm_listener *listener, + const char *dev_addr), TP_ARGS(iwdev, listener, dev_addr)); DEFINE_EVENT(qhash_template, irdma_add_mqh_4, TP_PROTO(struct irdma_device *iwdev, - struct irdma_cm_listener *listener, char *dev_addr), + struct irdma_cm_listener *listener, + const char *dev_addr), TP_ARGS(iwdev, listener, dev_addr)); TRACE_EVENT(irdma_addr_resolve, diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 0ebce57e8756..8b42c43fc14f 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -11,7 +11,7 @@ * @action: modify, delete or add */ int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, - u8 *mac_addr, u32 action) + const u8 *mac_addr, u32 action) { unsigned long flags; int arp_index; @@ -77,7 +77,7 @@ int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, * @ipv4: IPv4 flag * @mac: MAC address */ -int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac) +int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac) { int arpidx; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 6a544c253603..aa38ace15c07 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4326,7 +4326,7 @@ static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, static __be64 irdma_mac_to_guid(struct net_device *ndev) { - unsigned char *mac = ndev->dev_addr; + const unsigned char *mac = ndev->dev_addr; __be64 guid; unsigned char *dst = (unsigned char *)&guid; diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 398c4c00b932..18a70850b738 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -103,7 +103,7 @@ void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev) kfree(ufdev); } -void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, const char mac[ETH_ALEN]) { spin_lock(&ufdev->lock); memcpy(&ufdev->mac, mac, sizeof(ufdev->mac)); diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index f0b71d593da5..a91200886922 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -74,7 +74,7 @@ struct usnic_filter_action { struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); -void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, const char mac[ETH_ALEN]); void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); -- cgit From 86479f8a3fc72df53bb07fe2f26acbd249baebfe Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 21 Oct 2021 19:06:12 -0400 Subject: RDMA/irdma: Remove the unused spin lock in struct irdma_qp_uk The spin lock in struct irdma_qp_uk is not used. So remove it. Link: https://lore.kernel.org/r/20211021230612.153812-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/user.h | 1 - drivers/infiniband/hw/irdma/verbs.c | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 66e00660fbaa..cba1a2da938d 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -367,7 +367,6 @@ struct irdma_qp_uk { bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ bool destroy_pending:1; /* Indicates the QP is being destroyed */ void *back_qp; - spinlock_t *lock; u8 dbg_rq_flushed; u8 sq_flush_seen; u8 rq_flush_seen; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index aa38ace15c07..273d2a7b43d9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -833,7 +833,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, qp = &iwqp->sc_qp; qp->qp_uk.back_qp = iwqp; - qp->qp_uk.lock = &iwqp->lock; qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; iwqp->iwdev = iwdev; -- cgit From e058953c0ed18b5d2e125edec1cb6a258aa52446 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 25 Oct 2021 09:26:32 +0300 Subject: RDMA/qedr: Remove unsupported qedr_resize_cq callback There is no need to return always zero for function which is not supported, especially since 0 is the wrong return code. Fixes: a7efd7773e31 ("qedr: Add support for PD,PKEY and CQ verbs") Link: https://lore.kernel.org/r/20211025062632.3960-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 - drivers/infiniband/hw/qedr/verbs.c | 10 ---------- drivers/infiniband/hw/qedr/verbs.h | 1 - 3 files changed, 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 755930be01b8..8b5291d424d2 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -228,7 +228,6 @@ static const struct ib_device_ops qedr_dev_ops = { .query_srq = qedr_query_srq, .reg_user_mr = qedr_reg_user_mr, .req_notify_cq = qedr_arm_cq, - .resize_cq = qedr_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq), diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3fbf172dbbef..ce0e24fa14c5 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1052,16 +1052,6 @@ err0: return -EINVAL; } -int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) -{ - struct qedr_dev *dev = get_qedr_dev(ibcq->device); - struct qedr_cq *cq = get_qedr_cq(ibcq); - - DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq); - - return 0; -} - #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 031687dafc61..081753df79ef 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -53,7 +53,6 @@ int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int qedr_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs, -- cgit From 1e4df4a21c5ac722df1099eee30cad9246c889b5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 12 Oct 2021 15:09:02 +0300 Subject: RDMA/umem: Allow pinned dmabuf umem usage Introduce ib_umem_dmabuf_get_pinned() which allows the driver to get a dmabuf umem which is pinned and does not require move_notify callback implementation. The returned umem is pinned and DMA mapped like standard cpu umems, and is released through ib_umem_release() (incl. unpinning and unmapping). Link: https://lore.kernel.org/r/20211012120903.96933-3-galpress@amazon.com Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_dmabuf.c | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index e824baf4640d..372c21cd2927 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -163,12 +163,63 @@ out_release_dmabuf: } EXPORT_SYMBOL(ib_umem_dmabuf_get); +static void +ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + + ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev, + "Invalidate callback should not be called when memory is pinned\n"); +} + +static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = { + .allow_peer2peer = true, + .move_notify = ib_umem_dmabuf_unsupported_move_notify, +}; + +struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access) +{ + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + umem_dmabuf = ib_umem_dmabuf_get(device, offset, size, fd, access, + &ib_umem_dmabuf_attach_pinned_ops); + if (IS_ERR(umem_dmabuf)) + return umem_dmabuf; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = dma_buf_pin(umem_dmabuf->attach); + if (err) + goto err_release; + umem_dmabuf->pinned = 1; + + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) + goto err_unpin; + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + return umem_dmabuf; + +err_unpin: + dma_buf_unpin(umem_dmabuf->attach); +err_release: + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(err); +} +EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned); + void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; dma_resv_lock(dmabuf->resv, NULL); ib_umem_dmabuf_unmap_pages(umem_dmabuf); + if (umem_dmabuf->pinned) + dma_buf_unpin(umem_dmabuf->attach); dma_resv_unlock(dmabuf->resv); dma_buf_detach(dmabuf, umem_dmabuf->attach); -- cgit From 66f4817b5712eb8783916dc6c2005aac7e5e6928 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 12 Oct 2021 15:09:03 +0300 Subject: RDMA/efa: Add support for dmabuf memory regions Implement a dmabuf importer for the EFA driver. As ODP is not supported, the pinned dmabuf are used to prevent the move_notify callback from being called. Link: https://lore.kernel.org/r/20211012120903.96933-4-galpress@amazon.com Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 4 ++ drivers/infiniband/hw/efa/efa_main.c | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 127 +++++++++++++++++++++++++--------- 3 files changed, 101 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index e0c9ba4c70f4..7352a1f5d811 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -154,6 +154,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index d2a445f6f8e5..94b94cca4870 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -380,6 +380,7 @@ static const struct ib_device_ops efa_dev_ops = { .query_port = efa_query_port, .query_qp = efa_query_qp, .reg_user_mr = efa_reg_mr, + .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf, INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq), diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 8335a6f44869..ecfe70eb5efb 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -3,6 +3,8 @@ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ +#include +#include #include #include @@ -1544,26 +1546,18 @@ static int efa_create_pbl(struct efa_dev *dev, return 0; } -struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, + struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); - struct efa_com_reg_mr_params params = {}; - struct efa_com_reg_mr_result result = {}; - struct pbl_context pbl; int supp_access_flags; - unsigned int pg_sz; struct efa_mr *mr; - int inline_size; - int err; if (udata && udata->inlen && !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } supp_access_flags = @@ -1575,23 +1569,26 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", access_flags, supp_access_flags); - err = -EOPNOTSUPP; - goto err_out; + return ERR_PTR(-EOPNOTSUPP); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; - goto err_out; - } + if (!mr) + return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); - ibdev_dbg(&dev->ibdev, - "Failed to pin and map user space memory[%d]\n", err); - goto err_free; - } + return mr; +} + +static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, + u64 length, u64 virt_addr, int access_flags) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_com_reg_mr_params params = {}; + struct efa_com_reg_mr_result result = {}; + struct pbl_context pbl; + unsigned int pg_sz; + int inline_size; + int err; params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; @@ -1602,10 +1599,9 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, dev->dev_attr.page_size_cap, virt_addr); if (!pg_sz) { - err = -EOPNOTSUPP; ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", dev->dev_attr.page_size_cap); - goto err_unmap; + return -EOPNOTSUPP; } params.page_shift = order_base_2(pg_sz); @@ -1619,21 +1615,21 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, if (params.page_num <= inline_size) { err = efa_create_inline_pbl(dev, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); if (err) - goto err_unmap; + return err; } else { err = efa_create_pbl(dev, &pbl, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); pbl_destroy(dev, &pbl); if (err) - goto err_unmap; + return err; } mr->ibmr.lkey = result.l_key; @@ -1641,9 +1637,78 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, mr->ibmr.length = length; ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); + return 0; +} + +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct ib_umem_dmabuf *umem_dmabuf; + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, + access_flags); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + goto err_free; + } + + mr->umem = &umem_dmabuf->umem; + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + + return &mr->ibmr; + +err_release: + ib_umem_release(mr->umem); +err_free: + kfree(mr); +err_out: + atomic64_inc(&dev->stats.reg_mr_err); + return ERR_PTR(err); +} + +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(&dev->ibdev, + "Failed to pin and map user space memory[%d]\n", err); + goto err_free; + } + + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + return &mr->ibmr; -err_unmap: +err_release: ib_umem_release(mr->umem); err_free: kfree(mr); -- cgit From 067113d9db66f8a6cde6d9105404731b3b275202 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 26 Oct 2021 11:43:03 +0300 Subject: RDMA/core: Fix missed initialization of rdma_hw_stats::lock alloc_and_bind() creates a new rdma_hw_stats structure but misses initializing the mutex lock. This causes debug kernel failures: DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 4 PID: 64464 at kernel/locking/mutex.c:575 __mutex_lock+0x9c3/0x12b0 Call Trace: fill_res_counter_entry+0x6ee/0x1020 [ib_core] res_get_common_dumpit+0x907/0x10a0 [ib_core] nldev_stat_get_dumpit+0x20a/0x290 [ib_core] netlink_dump+0x451/0x1040 __netlink_dump_start+0x583/0x830 rdma_nl_rcv_msg+0x3f3/0x7c0 [ib_core] rdma_nl_rcv+0x264/0x410 [ib_core] netlink_unicast+0x433/0x700 netlink_sendmsg+0x707/0xbf0 sock_sendmsg+0xb0/0xe0 __sys_sendto+0x193/0x240 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Instead of requiring all users to open code initialization of the lock put it in the general rdma_alloc_hw_stats_struct() function and remove duplicates. Fixes: c4ffee7c9bdb ("RDMA/netlink: Implement counter dumpit calback") Link: https://lore.kernel.org/r/4a22986c4685058d2c735d91703ee7d865815bb9.1635237668.git.leonro@nvidia.com Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 2 -- drivers/infiniband/core/verbs.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 8626dfbf2199..a3f84b50c46a 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -911,7 +911,6 @@ alloc_hw_stats_device(struct ib_device *ibdev) if (!data->group.attrs) goto err_free_data; - mutex_init(&stats->lock); data->group.name = "hw_counters"; data->stats = stats; return data; @@ -1018,7 +1017,6 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) if (!group->attrs) goto err_free_data; - mutex_init(&stats->lock); group->name = "hw_counters"; data->stats = stats; return data; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 47cf273d0678..692d5ff657df 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -3002,6 +3002,7 @@ struct rdma_hw_stats *rdma_alloc_hw_stats_struct( stats->descs = descs; stats->num_counters = num_counters; stats->lifespan = msecs_to_jiffies(lifespan); + mutex_init(&stats->lock); return stats; -- cgit From 50604757e7925161356746c181c1a5e11a2b0f4b Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 27 Oct 2021 13:54:57 -0400 Subject: RDMA/irdma: Remove the unused variable local_qp Since the member variable local_qp is not used, remove it. Link: https://lore.kernel.org/r/20211027175457.201822-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/type.h | 1 - drivers/infiniband/hw/irdma/verbs.c | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 4312f2070534..9483bb3e10ea 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -852,7 +852,6 @@ struct irdma_roce_offload_info { u16 err_rq_idx; u32 qkey; u32 dest_qp; - u32 local_qp; u8 roce_tver; u8 ack_credits; u8 err_rq_idx_valid; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 273d2a7b43d9..2c78e34c7cd0 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1197,7 +1197,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); - roce_info->local_qp = ibqp->qp_num; if (av->sgid_addr.saddr.sa_family == AF_INET6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32; -- cgit From 69d1ed59999c1e9c823f78befd3fb8a22bdbff48 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Oct 2021 18:43:30 +0200 Subject: RDMA/rxe: Save a few bytes from struct rxe_pool 'table_size' is never read, it can be removed. In fact, the only place that uses something that could be 'table_size' is 'alloc_index()'. In this function, it is re-computed from 'min_index' and 'max_index'. Link: https://lore.kernel.org/r/2c42065049bb2b99bededdc423a9babf4a98adee.1635093628.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 1 - drivers/infiniband/sw/rxe/rxe_pool.h | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 7b4cb46edfd9..271d4ac0e0aa 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -114,7 +114,6 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) goto out; } - pool->index.table_size = size; bitmap_zero(pool->index.table, max - min + 1); out: diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 1feca1bffced..1ff2250edf6d 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -74,7 +74,6 @@ struct rxe_pool { struct { struct rb_root tree; unsigned long *table; - size_t table_size; u32 last; u32 max_index; u32 min_index; -- cgit From e30bb300a401a65b985c6af1f799080c80e1b950 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Oct 2021 18:43:56 +0200 Subject: RDMA/rxe: Use 'bitmap_zalloc()' when applicable 'index.table' is a bitmap. So use 'bitmap_zalloc()' to simplify code, improve the semantic and avoid some open-coded arithmetic in allocator arguments. Using 'bitmap_zalloc()' also allows the removal of a now useless 'bitmap_zero()'. Also change the corresponding 'kfree()' into 'bitmap_free()' to keep consistency. Link: https://lore.kernel.org/r/4a3e11d45865678d570333d1962820eb13168848.1635093628.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 271d4ac0e0aa..ed2427369c2c 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -96,7 +96,6 @@ static inline const char *pool_name(struct rxe_pool *pool) static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) { int err = 0; - size_t size; if ((max - min + 1) < pool->max_elem) { pr_warn("not enough indices for max_elem\n"); @@ -107,15 +106,12 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) pool->index.max_index = max; pool->index.min_index = min; - size = BITS_TO_LONGS(max - min + 1) * sizeof(long); - pool->index.table = kmalloc(size, GFP_KERNEL); + pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL); if (!pool->index.table) { err = -ENOMEM; goto out; } - bitmap_zero(pool->index.table, max - min + 1); - out: return err; } @@ -167,7 +163,7 @@ void rxe_pool_cleanup(struct rxe_pool *pool) pr_warn("%s pool destroyed with unfree'd elem\n", pool_name(pool)); - kfree(pool->index.table); + bitmap_free(pool->index.table); } static u32 alloc_index(struct rxe_pool *pool) -- cgit From 000b8490ecacb3ad5145314ca235c0a17d0187bf Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 24 Oct 2021 10:13:14 -0700 Subject: RDMA/rxe: Make rxe_type_info static const Make struct rxe_type_info static const and local to the only uses. Moves a bit of data to text. $ size drivers/infiniband/sw/rxe/rxe_pool.o* (defconfig w/ infiniband swe) text data bss dec hex filename 4456 12 0 4468 1174 drivers/infiniband/sw/rxe/rxe_pool.o.new 3817 652 0 4469 1175 drivers/infiniband/sw/rxe/rxe_pool.o.old Link: https://lore.kernel.org/r/166b715d71f98336e8ecab72b0dbdd266eee9193.camel@perches.com Signed-off-by: Joe Perches Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_pool.c | 28 ++++++++++++++++++---------- drivers/infiniband/sw/rxe/rxe_pool.h | 14 -------------- 2 files changed, 18 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index ed2427369c2c..2e80bb6aa957 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -7,9 +7,17 @@ #include "rxe.h" #include "rxe_loc.h" -/* info about object pools - */ -struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { +static const struct rxe_type_info { + const char *name; + size_t size; + size_t elem_offset; + void (*cleanup)(struct rxe_pool_entry *obj); + enum rxe_pool_flags flags; + u32 min_index; + u32 max_index; + size_t key_offset; + size_t key_size; +} rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "rxe-uc", .size = sizeof(struct rxe_ucontext), @@ -60,8 +68,8 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { .elem_offset = offsetof(struct rxe_mr, pelem), .cleanup = rxe_mr_cleanup, .flags = RXE_POOL_INDEX, - .max_index = RXE_MAX_MR_INDEX, .min_index = RXE_MIN_MR_INDEX, + .max_index = RXE_MAX_MR_INDEX, }, [RXE_TYPE_MW] = { .name = "rxe-mw", @@ -69,8 +77,8 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { .elem_offset = offsetof(struct rxe_mw, pelem), .cleanup = rxe_mw_cleanup, .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, - .max_index = RXE_MAX_MW_INDEX, .min_index = RXE_MIN_MW_INDEX, + .max_index = RXE_MAX_MW_INDEX, }, [RXE_TYPE_MC_GRP] = { .name = "rxe-mc_grp", @@ -324,7 +332,7 @@ void __rxe_drop_index(struct rxe_pool_entry *elem) void *rxe_alloc_locked(struct rxe_pool *pool) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; u8 *obj; @@ -349,7 +357,7 @@ out_cnt: void *rxe_alloc(struct rxe_pool *pool) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; u8 *obj; @@ -392,7 +400,7 @@ void rxe_elem_release(struct kref *kref) struct rxe_pool_entry *elem = container_of(kref, struct rxe_pool_entry, ref_cnt); struct rxe_pool *pool = elem->pool; - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; u8 *obj; if (pool->cleanup) @@ -408,7 +416,7 @@ void rxe_elem_release(struct kref *kref) void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; struct rxe_pool_entry *elem; u8 *obj; @@ -450,7 +458,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; struct rxe_pool_entry *elem; u8 *obj; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 1ff2250edf6d..8ecd9f870aea 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -32,20 +32,6 @@ enum rxe_elem_type { struct rxe_pool_entry; -struct rxe_type_info { - const char *name; - size_t size; - size_t elem_offset; - void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_flags flags; - u32 max_index; - u32 min_index; - size_t key_offset; - size_t key_size; -}; - -extern struct rxe_type_info rxe_type_info[]; - struct rxe_pool_entry { struct rxe_pool *pool; struct kref ref_cnt; -- cgit From 571fb4fb78a3bf0fcadbe65eca9ca4ccee885af4 Mon Sep 17 00:00:00 2001 From: Haoyue Xu Date: Fri, 29 Oct 2021 17:58:46 +0800 Subject: RDMA/hns: Fix initial arm_st of CQ We set the init CQ status to ARMED before. As a result, an unexpected CEQE would be reported. Therefore, the init CQ status should be set to no_armed rather than REG_NXT_CEQE. Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08") Link: https://lore.kernel.org/r/20211029095846.26732-1-liangwenpeng@huawei.com Signed-off-by: Haoyue Xu Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d5f3faa1627a..8e5f0862896e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3328,7 +3328,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, memset(cq_context, 0, sizeof(*cq_context)); hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID); - hr_reg_write(cq_context, CQC_ARM_ST, REG_NXT_CEQE); + hr_reg_write(cq_context, CQC_ARM_ST, NO_ARMED); hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth)); hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector); hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn); -- cgit From 0e60778efb072d47efc7100c4009b5bd97273b0b Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Fri, 29 Oct 2021 18:05:37 +0800 Subject: RDMA/hns: Modify the value of MAX_LP_MSG_LEN to meet hardware compatibility The upper limit of MAX_LP_MSG_LEN on HIP08 is 64K, and the upper limit on HIP09 is 16K. Regardless of whether it is HIP08 or HIP09, only 16K will be used. In order to ensure compatibility, it is unified to 16K. Setting MAX_LP_MSG_LEN to 16K will not cause performance loss on HIP08. Fixes: fbed9d2be292 ("RDMA/hns: Fix configuration of ack_req_freq in QPC") Link: https://lore.kernel.org/r/20211029100537.27299-1-liangwenpeng@huawei.com Signed-off-by: Yixing Liu Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8e5f0862896e..a9c6ffef9640 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4399,8 +4399,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, mtu = ib_mtu_enum_to_int(ib_mtu); if (WARN_ON(mtu <= 0)) return -EINVAL; -#define MAX_LP_MSG_LEN 65536 - /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ +#define MAX_LP_MSG_LEN 16384 + /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 16KB */ lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); if (WARN_ON(lp_pktn_ini >= 0xF)) return -EINVAL; -- cgit From 4f960393a0ee9a39469ceb7c8077ae8db665cc12 Mon Sep 17 00:00:00 2001 From: Alok Prasad Date: Wed, 27 Oct 2021 18:43:29 +0000 Subject: RDMA/qedr: Fix NULL deref for query_qp on the GSI QP This patch fixes a crash caused by querying the QP via netlink, and corrects the state of GSI qp. GSI qp's have a NULL qed_qp. The call trace is generated by: $ rdma res show BUG: kernel NULL pointer dereference, address: 0000000000000034 Hardware name: Dell Inc. PowerEdge R720/0M1GCR, BIOS 1.2.6 05/10/2012 RIP: 0010:qed_rdma_query_qp+0x33/0x1a0 [qed] RSP: 0018:ffffba560a08f580 EFLAGS: 00010206 RAX: 0000000200000000 RBX: ffffba560a08f5b8 RCX: 0000000000000000 RDX: ffffba560a08f5b8 RSI: 0000000000000000 RDI: ffff9807ee458090 RBP: ffffba560a08f5a0 R08: 0000000000000000 R09: ffff9807890e7048 R10: ffffba560a08f658 R11: 0000000000000000 R12: 0000000000000000 R13: ffff9807ee458090 R14: ffff9807f0afb000 R15: ffffba560a08f7ec FS: 00007fbbf8bfe740(0000) GS:ffff980aafa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000034 CR3: 00000001720ba001 CR4: 00000000000606f0 Call Trace: qedr_query_qp+0x82/0x360 [qedr] ib_query_qp+0x34/0x40 [ib_core] ? ib_query_qp+0x34/0x40 [ib_core] fill_res_qp_entry_query.isra.26+0x47/0x1d0 [ib_core] ? __nla_put+0x20/0x30 ? nla_put+0x33/0x40 fill_res_qp_entry+0xe3/0x120 [ib_core] res_get_common_dumpit+0x3f8/0x5d0 [ib_core] ? fill_res_cm_id_entry+0x1f0/0x1f0 [ib_core] nldev_res_get_qp_dumpit+0x1a/0x20 [ib_core] netlink_dump+0x156/0x2f0 __netlink_dump_start+0x1ab/0x260 rdma_nl_rcv+0x1de/0x330 [ib_core] ? nldev_res_get_cm_id_dumpit+0x20/0x20 [ib_core] netlink_unicast+0x1b8/0x270 netlink_sendmsg+0x33e/0x470 sock_sendmsg+0x63/0x70 __sys_sendto+0x13f/0x180 ? setup_sgl.isra.12+0x70/0xc0 __x64_sys_sendto+0x28/0x30 do_syscall_64+0x3a/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae Cc: stable@vger.kernel.org Fixes: cecbcddf6461 ("qedr: Add support for QP verbs") Link: https://lore.kernel.org/r/20211027184329.18454-1-palok@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: Prabhakar Kushwaha Signed-off-by: Alok Prasad Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index dcb3653db72d..3d4e4a766574 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2744,15 +2744,18 @@ int qedr_query_qp(struct ib_qp *ibqp, int rc = 0; memset(¶ms, 0, sizeof(params)); - - rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); - if (rc) - goto err; - memset(qp_attr, 0, sizeof(*qp_attr)); memset(qp_init_attr, 0, sizeof(*qp_init_attr)); - qp_attr->qp_state = qedr_get_ibqp_state(params.state); + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); + if (rc) + goto err; + qp_attr->qp_state = qedr_get_ibqp_state(params.state); + } else { + qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS); + } + qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); qp_attr->path_mig_state = IB_MIG_MIGRATED; -- cgit From 04567caf96e5e1150ecd6a3b3301f5aafe015ec0 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 27 Oct 2021 23:54:48 +0300 Subject: RDMA/bnxt_re: Fix kernel panic when trying to access bnxt_re_stat_descs For some reason when introducing the fixed commit the "active_pds" and "active_ahs" descriptors got dropped, which lead to the following panic when trying to access the first entry in the descriptors. bnxt_re: Broadcom NetXtreme-C/E RoCE Driver BUG: kernel NULL pointer dereference, address: 0000000000000000 CPU: 2 PID: 594 Comm: kworker/u32:1 Not tainted 5.15.0-rc6+ #2 Hardware name: Dell Inc. PowerEdge R430/0CN7X8, BIOS 2.12.1 12/07/2020 Workqueue: bnxt_re bnxt_re_task [bnxt_re] RIP: 0010:strlen+0x0/0x20 Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 31 RSP: 0018:ffffb25fc47dfbb0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000008100 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 00000000fffffff4 R09: 0000000000000000 R10: ffff8a05c71fc028 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8a05c3dee800 FS: 0000000000000000(0000) GS:ffff8a092fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000048d3da001 CR4: 00000000001706e0 Call Trace: kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xd0 kernfs_remove_by_name_ns+0x32/0x90 remove_files+0x2b/0x60 create_files+0x1d3/0x1f0 internal_create_group+0x17b/0x1f0 internal_create_groups.part.0+0x3d/0xa0 setup_port+0x180/0x3b0 [ib_core] ? __cond_resched+0x16/0x40 ? kmem_cache_alloc_trace+0x278/0x3d0 ib_setup_port_attrs+0x99/0x240 [ib_core] ib_register_device+0xcc/0x160 [ib_core] bnxt_re_task+0xba/0x170 [bnxt_re] process_one_work+0x1eb/0x390 worker_thread+0x53/0x3d0 ? process_one_work+0x390/0x390 kthread+0x10f/0x130 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x22/0x30 Fixes: 13f30b0fa0a9 ("RDMA/counter: Add a descriptor in struct rdma_hw_stats") Link: https://lore.kernel.org/r/20211027205448.127821-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Selvin Xavier Reviewed-by: Leon Romanovsky Reviewed-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 78ca6dfd182b..825d512799d9 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -58,6 +58,8 @@ #include "hw_counters.h" static const struct rdma_stat_desc bnxt_re_stat_descs[] = { + [BNXT_RE_ACTIVE_PD].name = "active_pds", + [BNXT_RE_ACTIVE_AH].name = "active_ahs", [BNXT_RE_ACTIVE_QP].name = "active_qps", [BNXT_RE_ACTIVE_SRQ].name = "active_srqs", [BNXT_RE_ACTIVE_CQ].name = "active_cqs", -- cgit From 493620b1c9032b7149f2c345869de8539f4c80b5 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 28 Oct 2021 12:43:59 +0300 Subject: RDMA/bnxt_re: Use helper function to set GUIDs Use addrconf_addr_eui48() helper function to set the GUIDs and remove the driver specific version. Link: https://lore.kernel.org/r/20211028094359.160407-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 +++-- drivers/infiniband/hw/bnxt_re/main.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.c | 17 ----------------- drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 - 4 files changed, 4 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 67ca794f64e8..d5f347f8d088 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -130,8 +131,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, memcpy(&ib_attr->fw_ver, dev_attr->fw_ver, min(sizeof(dev_attr->fw_ver), sizeof(ib_attr->fw_ver))); - bnxt_qplib_get_guid(rdev->netdev->dev_addr, - (u8 *)&ib_attr->sys_image_guid); + addrconf_addr_eui48((u8 *)&ib_attr->sys_image_guid, + rdev->netdev->dev_addr); ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_SUPPORTED; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 4fa3b14b2613..83c0748e092d 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -730,7 +730,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) strlen(BNXT_RE_DESC) + 5); ibdev->phys_port_cnt = 1; - bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid); + addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); ibdev->num_comp_vectors = rdev->num_msix - 1; ibdev->dev.parent = &rdev->en_dev->pdev->dev; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index a1d8a87dc678..bc1ba4b51ba4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -572,23 +572,6 @@ fail: return rc; } -/* GUID */ -void bnxt_qplib_get_guid(const u8 *dev_addr, u8 *guid) -{ - u8 mac[ETH_ALEN]; - - /* MAC-48 to EUI-64 mapping */ - memcpy(mac, dev_addr, ETH_ALEN); - guid[0] = mac[0] ^ 2; - guid[1] = mac[1]; - guid[2] = mac[2]; - guid[3] = 0xff; - guid[4] = 0xfe; - guid[5] = mac[3]; - guid[6] = mac[4]; - guid[7] = mac[5]; -} - static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index d2951a713cc8..e1411a2352a7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -346,7 +346,6 @@ void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res, struct bnxt_qplib_hwq *hwq); int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_hwq_attr *hwq_attr); -void bnxt_qplib_get_guid(const u8 *dev_addr, u8 *guid); int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, struct bnxt_qplib_pd *pd); int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, -- cgit From ddf65f28ddca002e01007f335b57f7ecd5109b92 Mon Sep 17 00:00:00 2001 From: Scott Breyer Date: Thu, 28 Oct 2021 08:46:01 -0400 Subject: IB/hfi1: Rebranding of hfi1 driver to Cornelis Networks Changes instances of Intel to Cornelis in identifying strings Link: https://lore.kernel.org/r/20211028124601.26694.35662.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Scott Breyer Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/Kconfig | 4 ++-- drivers/infiniband/hw/hfi1/chip.c | 3 ++- drivers/infiniband/hw/hfi1/driver.c | 3 ++- drivers/infiniband/hw/hfi1/init.c | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index 519866b30a13..6eb739052121 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HFI1 - tristate "Intel OPA Gen1 support" + tristate "Cornelis OPX Gen1 support" depends on X86_64 && INFINIBAND_RDMAVT && I2C select MMU_NOTIFIER select CRC32 select I2C_ALGOBIT help - This is a low-level driver for Intel OPA Gen1 adapter. + This is a low-level driver for Cornelis OPX Gen1 adapter. config HFI1_DEBUG_SDMA_ORDER bool "HFI1 SDMA Order debug" depends on INFINIBAND_HFI1 diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 37273dc0c03c..ec37f4fd8e96 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2015 - 2020 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. */ /* @@ -14918,7 +14919,7 @@ static int obtain_boardname(struct hfi1_devdata *dd) { /* generic board description */ const char generic[] = - "Intel Omni-Path Host Fabric Interface Adapter 100 Series"; + "Cornelis Omni-Path Host Fabric Interface Adapter 100 Series"; unsigned long size; int ret; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index de411884386b..61f341c3005c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2015-2020 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. */ #include @@ -56,7 +57,7 @@ module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); +MODULE_DESCRIPTION("Cornelis Omni-Path Express driver"); /* * MAX_PKT_RCV is the max # if packets processed per receive interrupt. diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e3679d076eaa..dbd1c31830b9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2015 - 2020 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. */ #include @@ -1342,7 +1343,7 @@ static void remove_one(struct pci_dev *); static int init_one(struct pci_dev *, const struct pci_device_id *); static void shutdown_one(struct pci_dev *); -#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " +#define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " const struct pci_device_id hfi1_pci_tbl[] = { -- cgit From 840f4ed2d47b525c0bccc10e4aeaf7a4beeffc67 Mon Sep 17 00:00:00 2001 From: Scott Breyer Date: Thu, 28 Oct 2021 08:46:06 -0400 Subject: IB/qib: Rebranding of qib driver to Cornelis Networks Changes instances of Intel to Cornelis in identifying strings Link: https://lore.kernel.org/r/20211028124606.26694.71567.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Scott Breyer Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_driver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 84fc4dcc5399..bf3fa12fe935 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2021 Cornelis Networks. All rights reserved. * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -62,8 +63,8 @@ MODULE_PARM_DESC(compat_ddr_negotiate, "Attempt pre-IBTA 1.2 DDR speed negotiation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel "); -MODULE_DESCRIPTION("Intel IB driver"); +MODULE_AUTHOR("Cornelis "); +MODULE_DESCRIPTION("Cornelis IB driver"); /* * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our -- cgit From 4892298c3a33d965cd0d34ffd66b79b45abdf9b7 Mon Sep 17 00:00:00 2001 From: Scott Breyer Date: Thu, 28 Oct 2021 08:46:11 -0400 Subject: IB/opa_vnic: Rebranding of OPA VNIC driver to Cornelis Networks Changes instances of Intel to Cornelis in identifying strings Link: https://lore.kernel.org/r/20211028124611.26694.71239.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Scott Breyer Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/opa_vnic/Kconfig | 4 ++-- drivers/infiniband/ulp/opa_vnic/Makefile | 3 ++- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 7 ++++--- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig index e84248587187..4d43d055fa8e 100644 --- a/drivers/infiniband/ulp/opa_vnic/Kconfig +++ b/drivers/infiniband/ulp/opa_vnic/Kconfig @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_OPA_VNIC - tristate "Intel OPA VNIC support" + tristate "Cornelis OPX VNIC support" depends on X86_64 && INFINIBAND help - This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC) + This is Omni-Path Express (OPX) Virtual Network Interface Controller (VNIC) driver for Ethernet over Omni-Path feature. It implements the HW independent VNIC functionality. It interfaces with Linux stack for data path and IB MAD for the control path. diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile index a8c21d140ccb..196183817cdc 100644 --- a/drivers/infiniband/ulp/opa_vnic/Makefile +++ b/drivers/infiniband/ulp/opa_vnic/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -# Makefile - Intel Omni-Path Virtual Network Controller driver +# Makefile - Cornelis Omni-Path Express Virtual Network Controller driver # Copyright(c) 2017, Intel Corporation. +# Copyright(c) 2021, Cornelis Networks. # obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index cecf0f7cadf9..21c6cea8b1db 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -1,5 +1,6 @@ /* * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -46,7 +47,7 @@ */ /* - * This file contains OPA Virtual Network Interface Controller (VNIC) + * This file contains OPX Virtual Network Interface Controller (VNIC) * Ethernet Management Agent (EMA) driver */ @@ -1051,5 +1052,5 @@ static void opa_vnic_deinit(void) module_exit(opa_vnic_deinit); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("Intel OPA Virtual Network driver"); +MODULE_AUTHOR("Cornelis Networks"); +MODULE_DESCRIPTION("Cornelis OPX Virtual Network driver"); -- cgit From 6d202d9f70a33560ab62b81da2b062c936437e54 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 28 Oct 2021 18:56:40 +0800 Subject: RDMA/hns: Use the core code to manage the fixed mmap entries Add a new implementation for mmap by using the new mmap entry API. This makes way for further use of the dynamic mmap allocator in this driver. Link: https://lore.kernel.org/r/20211028105640.1056-1-liangwenpeng@huawei.com Signed-off-by: Chengchang Tang Signed-off-by: Yixing Liu Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 23 +++++ drivers/infiniband/hw/hns/hns_roce_main.c | 139 ++++++++++++++++++++++------ 2 files changed, 135 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e5dadcd118ac..43e17d61cb63 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -225,11 +225,24 @@ struct hns_roce_uar { unsigned long logic_idx; }; +enum hns_roce_mmap_type { + HNS_ROCE_MMAP_TYPE_DB = 1, + HNS_ROCE_MMAP_TYPE_TPTR, +}; + +struct hns_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + enum hns_roce_mmap_type mmap_type; + u64 address; +}; + struct hns_roce_ucontext { struct ib_ucontext ibucontext; struct hns_roce_uar uar; struct list_head page_list; struct mutex page_mutex; + struct hns_user_mmap_entry *db_mmap_entry; + struct hns_user_mmap_entry *tptr_mmap_entry; }; struct hns_roce_pd { @@ -1050,6 +1063,12 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct hns_roce_srq, ibsrq); } +static inline struct hns_user_mmap_entry * +to_hns_mmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct hns_user_mmap_entry, rdma_entry); +} + static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { writeq(*(u64 *)val, dest); @@ -1260,4 +1279,8 @@ int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); +struct hns_user_mmap_entry * +hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, + size_t length, + enum hns_roce_mmap_type mmap_type); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b3595b6079b5..4194b626f3c6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -292,6 +292,79 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, return 0; } +struct hns_user_mmap_entry * +hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, + size_t length, + enum hns_roce_mmap_type mmap_type) +{ + struct hns_user_mmap_entry *entry; + int ret; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->address = address; + entry->mmap_type = mmap_type; + + ret = rdma_user_mmap_entry_insert_exact( + ucontext, &entry->rdma_entry, length, + mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1); + if (ret) { + kfree(entry); + return NULL; + } + + return entry; +} + +static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context) +{ + if (context->db_mmap_entry) + rdma_user_mmap_entry_remove( + &context->db_mmap_entry->rdma_entry); + + if (context->tptr_mmap_entry) + rdma_user_mmap_entry_remove( + &context->tptr_mmap_entry->rdma_entry); +} + +static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) +{ + struct hns_roce_ucontext *context = to_hr_ucontext(uctx); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + u64 address; + int ret; + + address = context->uar.pfn << PAGE_SHIFT; + context->db_mmap_entry = hns_roce_user_mmap_entry_insert( + uctx, address, PAGE_SIZE, HNS_ROCE_MMAP_TYPE_DB); + if (!context->db_mmap_entry) + return -ENOMEM; + + if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) + return 0; + + /* + * FIXME: using io_remap_pfn_range on the dma address returned + * by dma_alloc_coherent is totally wrong. + */ + context->tptr_mmap_entry = + hns_roce_user_mmap_entry_insert(uctx, hr_dev->tptr_dma_addr, + hr_dev->tptr_size, + HNS_ROCE_MMAP_TYPE_TPTR); + if (!context->tptr_mmap_entry) { + ret = -ENOMEM; + goto err; + } + + return 0; + +err: + hns_roce_dealloc_uar_entry(context); + return ret; +} + static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -310,6 +383,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (ret) goto error_fail_uar_alloc; + ret = hns_roce_alloc_uar_entry(uctx); + if (ret) + goto error_fail_uar_entry; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { INIT_LIST_HEAD(&context->page_list); @@ -326,6 +403,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, return 0; error_fail_copy_to_udata: + hns_roce_dealloc_uar_entry(context); + +error_fail_uar_entry: ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); error_fail_uar_alloc: @@ -337,39 +417,43 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); + hns_roce_dealloc_uar_entry(context); + ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); } -static int hns_roce_mmap(struct ib_ucontext *context, - struct vm_area_struct *vma) +static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) { - struct hns_roce_dev *hr_dev = to_hr_dev(context->device); - - switch (vma->vm_pgoff) { - case 0: - return rdma_user_mmap_io(context, vma, - to_hr_ucontext(context)->uar.pfn, - PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot), - NULL); - - /* vm_pgoff: 1 -- TPTR */ - case 1: - if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) - return -EINVAL; - /* - * FIXME: using io_remap_pfn_range on the dma address returned - * by dma_alloc_coherent is totally wrong. - */ - return rdma_user_mmap_io(context, vma, - hr_dev->tptr_dma_addr >> PAGE_SHIFT, - hr_dev->tptr_size, - vma->vm_page_prot, - NULL); + struct rdma_user_mmap_entry *rdma_entry; + struct hns_user_mmap_entry *entry; + phys_addr_t pfn; + pgprot_t prot; + int ret; - default: + rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); + if (!rdma_entry) return -EINVAL; - } + + entry = to_hns_mmap(rdma_entry); + pfn = entry->address >> PAGE_SHIFT; + prot = vma->vm_page_prot; + + if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR) + prot = pgprot_noncached(prot); + + ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE, + prot, rdma_entry); + + rdma_user_mmap_entry_put(rdma_entry); + + return ret; +} + +static void hns_roce_free_mmap(struct rdma_user_mmap_entry *rdma_entry) +{ + struct hns_user_mmap_entry *entry = to_hns_mmap(rdma_entry); + + kfree(entry); } static int hns_roce_port_immutable(struct ib_device *ib_dev, u32 port_num, @@ -445,6 +529,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .get_link_layer = hns_roce_get_link_layer, .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, + .mmap_free = hns_roce_free_mmap, .modify_device = hns_roce_modify_device, .modify_qp = hns_roce_modify_qp, .query_ah = hns_roce_query_ah, -- cgit From 9ed8110c9b298cf143a8abc6dab975547da72888 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 30 Oct 2021 06:42:26 -0400 Subject: RDMA/irdma: optimize rx path by removing unnecessary copy In the function irdma_post_recv, the function irdma_copy_sg_list is not needed since the struct irdma_sge and ib_sge have the similar member variables. The struct irdma_sge can be replaced with the struct ib_sge totally. This can increase the rx performance of irdma. Link: https://lore.kernel.org/r/20211030104226.253346-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 38 ++++++++++++++++----------------- drivers/infiniband/hw/irdma/user.h | 23 +++++++------------- drivers/infiniband/hw/irdma/verbs.c | 42 +++++++++++-------------------------- 3 files changed, 39 insertions(+), 64 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index cc578974ad0e..0080bc766b73 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -13,16 +13,16 @@ * @sge: sge length and stag * @valid: The wqe valid */ -static void irdma_set_fragment(__le64 *wqe, u32 offset, struct irdma_sge *sge, +static void irdma_set_fragment(__le64 *wqe, u32 offset, struct ib_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); set_64bit_val(wqe, offset + 8, FIELD_PREP(IRDMAQPSQ_VALID, valid) | - FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->len) | - FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->stag)); + FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->length) | + FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->lkey)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + 8, @@ -38,14 +38,14 @@ static void irdma_set_fragment(__le64 *wqe, u32 offset, struct irdma_sge *sge, * @valid: wqe valid flag */ static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset, - struct irdma_sge *sge, u8 valid) + struct ib_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); set_64bit_val(wqe, offset + 8, - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->len) | - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->stag)); + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->length) | + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->lkey)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + 8, 0); @@ -289,7 +289,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, return IRDMA_ERR_INVALID_FRAG_COUNT; for (i = 0; i < op_info->num_lo_sges; i++) - total_size += op_info->lo_sg_list[i].len; + total_size += op_info->lo_sg_list[i].length; read_fence |= info->read_fence; @@ -310,7 +310,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, irdma_clr_wqes(qp, wqe_idx); set_64bit_val(wqe, 16, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); if (info->imm_data_valid) { set_64bit_val(wqe, 0, @@ -339,7 +339,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, ++addl_frag_cnt; } - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | @@ -391,7 +391,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, return IRDMA_ERR_INVALID_FRAG_COUNT; for (i = 0; i < op_info->num_lo_sges; i++) - total_size += op_info->lo_sg_list[i].len; + total_size += op_info->lo_sg_list[i].length; ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); if (ret_code) @@ -426,8 +426,8 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, ++addl_frag_cnt; } set_64bit_val(wqe, 16, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_OPCODE, @@ -477,7 +477,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, return IRDMA_ERR_INVALID_FRAG_COUNT; for (i = 0; i < op_info->num_sges; i++) - total_size += op_info->sg_list[i].len; + total_size += op_info->sg_list[i].length; if (info->imm_data_valid) frag_cnt = op_info->num_sges + 1; @@ -705,9 +705,9 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in read_fence |= info->read_fence; set_64bit_val(wqe, 16, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | @@ -826,7 +826,7 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, u64 hdr; u32 wqe_idx; bool local_fence = false; - struct irdma_sge sge = {}; + struct ib_sge sge = {}; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.inv_local_stag; @@ -839,7 +839,7 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, irdma_clr_wqes(qp, wqe_idx); - sge.stag = op_info->target_stag; + sge.lkey = op_info->target_stag; qp->wqe_ops.iw_set_fragment(wqe, 0, &sge, 0); set_64bit_val(wqe, 16, 0); diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index cba1a2da938d..3c811fb88404 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -16,7 +16,6 @@ #define irdma_access_privileges u32 #define irdma_physical_fragment u64 #define irdma_address_list u64 * -#define irdma_sgl struct irdma_sge * #define IRDMA_MAX_MR_SIZE 0x200000000000ULL @@ -151,12 +150,6 @@ struct irdma_cq_uk; struct irdma_qp_uk_init_info; struct irdma_cq_uk_init_info; -struct irdma_sge { - irdma_tagged_offset tag_off; - u32 len; - irdma_stag stag; -}; - struct irdma_ring { u32 head; u32 tail; @@ -172,7 +165,7 @@ struct irdma_extended_cqe { }; struct irdma_post_send { - irdma_sgl sg_list; + struct ib_sge *sg_list; u32 num_sges; u32 qkey; u32 dest_qp; @@ -189,26 +182,26 @@ struct irdma_post_inline_send { struct irdma_post_rq_info { u64 wr_id; - irdma_sgl sg_list; + struct ib_sge *sg_list; u32 num_sges; }; struct irdma_rdma_write { - irdma_sgl lo_sg_list; + struct ib_sge *lo_sg_list; u32 num_lo_sges; - struct irdma_sge rem_addr; + struct ib_sge rem_addr; }; struct irdma_inline_rdma_write { void *data; u32 len; - struct irdma_sge rem_addr; + struct ib_sge rem_addr; }; struct irdma_rdma_read { - irdma_sgl lo_sg_list; + struct ib_sge *lo_sg_list; u32 num_lo_sges; - struct irdma_sge rem_addr; + struct ib_sge rem_addr; }; struct irdma_bind_window { @@ -304,7 +297,7 @@ enum irdma_status_code irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, struct irdma_wqe_uk_ops { void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); u16 (*iw_inline_data_size_to_quanta)(u32 data_size); - void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct irdma_sge *sge, + void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge, u8 valid); void (*iw_set_mw_bind_wqe)(__le64 *wqe, struct irdma_bind_window *op_info); diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 2c78e34c7cd0..2fce6a45257b 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3038,24 +3038,6 @@ done: return 0; } -/** - * irdma_copy_sg_list - copy sg list for qp - * @sg_list: copied into sg_list - * @sgl: copy from sgl - * @num_sges: count of sg entries - */ -static void irdma_copy_sg_list(struct irdma_sge *sg_list, struct ib_sge *sgl, - int num_sges) -{ - unsigned int i; - - for (i = 0; (i < num_sges) && (i < IRDMA_MAX_WQ_FRAGMENT_COUNT); i++) { - sg_list[i].tag_off = sgl[i].addr; - sg_list[i].len = sgl[i].length; - sg_list[i].stag = sgl[i].lkey; - } -} - /** * irdma_post_send - kernel application wr * @ibqp: qp ptr for wr @@ -3132,8 +3114,7 @@ static int irdma_post_send(struct ib_qp *ibqp, ret = irdma_uk_inline_send(ukqp, &info, false); } else { info.op.send.num_sges = ib_wr->num_sge; - info.op.send.sg_list = (struct irdma_sge *) - ib_wr->sg_list; + info.op.send.sg_list = ib_wr->sg_list; if (iwqp->ibqp.qp_type == IB_QPT_UD || iwqp->ibqp.qp_type == IB_QPT_GSI) { ah = to_iwah(ud_wr(ib_wr)->ah); @@ -3168,15 +3149,18 @@ static int irdma_post_send(struct ib_qp *ibqp, if (ib_wr->send_flags & IB_SEND_INLINE) { info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr; - info.op.inline_rdma_write.len = ib_wr->sg_list[0].length; - info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; - info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; + info.op.inline_rdma_write.len = + ib_wr->sg_list[0].length; + info.op.inline_rdma_write.rem_addr.addr = + rdma_wr(ib_wr)->remote_addr; + info.op.inline_rdma_write.rem_addr.lkey = + rdma_wr(ib_wr)->rkey; ret = irdma_uk_inline_rdma_write(ukqp, &info, false); } else { info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_write.num_lo_sges = ib_wr->num_sge; - info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; - info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; + info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; + info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; ret = irdma_uk_rdma_write(ukqp, &info, false); } @@ -3197,8 +3181,8 @@ static int irdma_post_send(struct ib_qp *ibqp, break; } info.op_type = IRDMA_OP_TYPE_RDMA_READ; - info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; - info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; + info.op.rdma_read.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; + info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey; info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_read.num_lo_sges = ib_wr->num_sge; @@ -3285,7 +3269,6 @@ static int irdma_post_recv(struct ib_qp *ibqp, struct irdma_qp *iwqp; struct irdma_qp_uk *ukqp; struct irdma_post_rq_info post_recv = {}; - struct irdma_sge sg_list[IRDMA_MAX_WQ_FRAGMENT_COUNT]; enum irdma_status_code ret = 0; unsigned long flags; int err = 0; @@ -3300,8 +3283,7 @@ static int irdma_post_recv(struct ib_qp *ibqp, while (ib_wr) { post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; - irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); - post_recv.sg_list = sg_list; + post_recv.sg_list = ib_wr->sg_list; ret = irdma_uk_post_receive(ukqp, &post_recv); if (ret) { ibdev_dbg(&iwqp->iwdev->ibdev, -- cgit From dd83f482d2cd5aa6a515a343b822c12b66661c2e Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 2 Nov 2021 09:30:54 +0200 Subject: RDMA/bnxt_re: Remove unsupported bnxt_re_modify_ah callback There is no need to return always zero for function which is not supported, especially since 0 is the wrong return code. Link: https://lore.kernel.org/r/20211102073054.410838-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ----- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 1 - drivers/infiniband/hw/bnxt_re/main.c | 1 - 3 files changed, 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index d5f347f8d088..29cc0d14399a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -708,11 +708,6 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, return 0; } -int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) -{ - return 0; -} - int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index b5c6e0f4f877..94326267f9bb 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -166,7 +166,6 @@ int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); int bnxt_re_create_srq(struct ib_srq *srq, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 83c0748e092d..b44944fb9b24 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -696,7 +696,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .get_port_immutable = bnxt_re_get_port_immutable, .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, - .modify_ah = bnxt_re_modify_ah, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, -- cgit From f1a090f09f42be5a5542009f0be310fdb3e768fc Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 28 Oct 2021 08:55:22 +0300 Subject: RDMA/core: Require the driver to set the IOVA correctly during rereg_mr If the driver returns a new MR during rereg it has to fill it with the IOVA from the proper source. If IB_MR_REREG_TRANS is set then the IOVA is cmd.hca_va, otherwise the IOVA comes from the old MR. mlx5 for example has two calls inside rereg_mr: return create_real_mr(new_pd, umem, mr->ibmr.iova, new_access_flags); and return create_real_mr(new_pd, new_umem, iova, new_access_flags); Unconditionally overwriting the iova in the newly allocated MR will corrupt the iova if the first path is used. Remove the redundant initializations from ib_uverbs_rereg_mr(). Fixes: 6e0954b11c05 ("RDMA/uverbs: Allow drivers to create a new HW object during rereg_mr") Link: https://lore.kernel.org/r/4b0a31bbc372842613286a10d7a8cbb0ee6069c7.1635400472.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 740e6b2efe0e..d1345d76d9b1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -837,11 +837,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) new_mr->device = new_pd->device; new_mr->pd = new_pd; new_mr->type = IB_MR_TYPE_USER; - new_mr->dm = NULL; - new_mr->sig_attrs = NULL; new_mr->uobject = uobj; atomic_inc(&new_pd->usecnt); - new_mr->iova = cmd.hca_va; new_uobj->object = new_mr; rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); -- cgit