diff options
author | Michal Kalderon <michal.kalderon@marvell.com> | 2019-10-27 22:04:50 +0200 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-10-28 14:01:35 -0300 |
commit | 82af6d19d8d9227c22a53ff00b40fb2a4f9fce69 (patch) | |
tree | 35750c8f4b4ed206967b9f19edc51a71e4172226 /drivers/infiniband/hw/qedr/verbs.c | |
parent | 5fdff18b4dc64e2d1e912ad2b90495cd487f791b (diff) |
RDMA/qedr: Fix synchronization methods and memory leaks in qedr
Re-design of the iWARP CM related objects reference counting and
synchronization methods, to ensure operations are synchronized correctly
and that memory allocated for "ep" is properly released. Also makes sure
QP memory is not released before ep is finished accessing it.
Where as the QP object is created/destroyed by external operations, the ep
is created/destroyed by internal operations and represents the tcp
connection associated with the QP.
QP destruction flow:
- needs to wait for ep establishment to complete (either successfully or
with error)
- needs to wait for ep disconnect to be fully posted to avoid a race
condition of disconnect being called after reset.
- both the operations above don't always happen, so we use atomic flags to
indicate whether the qp destruction flow needs to wait for these
completions or not, if the destroy is called before these operations
began, the flows will check the flags and not execute them ( connect /
disconnect).
We use completion structure for waiting for the completions mentioned
above.
The QP refcnt was modified to kref object. The EP has a kref added to it
to handle additional worker thread accessing it.
Memory Leaks - https://www.spinics.net/lists/linux-rdma/msg83762.html
Concurrency not managed correctly -
https://www.spinics.net/lists/linux-rdma/msg67949.html
Fixes: de0089e692a9 ("RDMA/qedr: Add iWARP connection management qp related callbacks")
Link: https://lore.kernel.org/r/20191027200451.28187-4-michal.kalderon@marvell.com
Reported-by: Chuck Lever <chuck.lever@oracle.com>
Reported-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/hw/qedr/verbs.c')
-rw-r--r-- | drivers/infiniband/hw/qedr/verbs.c | 62 |
1 files changed, 39 insertions, 23 deletions
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 84b666c67cff..a17b388ee3b3 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,7 @@ #include "verbs.h" #include <rdma/qedr-abi.h> #include "qedr_roce_cm.h" +#include "qedr_iw_cm.h" #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) #define RDMA_MAX_SGE_PER_SRQ (4) @@ -1193,7 +1194,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); - atomic_set(&qp->refcnt, 1); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + kref_init(&qp->refcnt); + init_completion(&qp->iwarp_cm_comp); + } qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1592,6 +1596,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; + qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rc) { @@ -1805,6 +1810,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, u32 n_sq_entries; memset(&in_params, 0, sizeof(in_params)); + qp->create_type = QEDR_QP_CREATE_KERNEL; /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in * the ring. The ring should allow at least a single WR, even if the @@ -2437,7 +2443,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, return rc; } - if (udata) + if (qp->create_type == QEDR_QP_CREATE_USER) qedr_cleanup_user(dev, qp); else qedr_cleanup_kernel(dev, qp); @@ -2467,34 +2473,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } } else { - /* Wait for the connect/accept to complete */ - if (qp->ep) { - int wait_count = 1; - - while (qp->ep->during_connect) { - DP_DEBUG(dev, QEDR_MSG_QP, - "Still in during connect/accept\n"); - - msleep(100); - if (wait_count++ > 200) { - DP_NOTICE(dev, - "during connect timeout\n"); - break; - } - } - } + /* If connection establishment started the WAIT_FOR_CONNECT + * bit will be on and we need to Wait for the establishment + * to complete before destroying the qp. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); + + /* If graceful disconnect started, the WAIT_FOR_DISCONNECT + * bit will be on, and we need to wait for the disconnect to + * complete before continuing. We can use the same completion, + * iwarp_cm_comp, since this is the only place that waits for + * this completion and it is sequential. In addition, + * disconnect can't occur before the connection is fully + * established, therefore if WAIT_FOR_DISCONNECT is on it + * means WAIT_FOR_CONNECT is also on and the completion for + * CONNECT already occurred. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); } if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); + /* We need to remove the entry from the xarray before we release the + * qp_id to avoid a race of the qp_id being reallocated and failing + * on xa_insert + */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + xa_erase(&dev->qps, qp->qp_id); + qedr_free_qp_resources(dev, qp, udata); - if (atomic_dec_and_test(&qp->refcnt) && - rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase(&dev->qps, qp->qp_id); - kfree(qp); - } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iw_qp_rem_ref(&qp->ibqp); + return 0; } |