RDMA/rxe: Protect QP state with qp->state_lock

Currently the rxe driver makes little effort to make the changes to qp state (which includes qp->attr.qp_state, qp->attr.sq_draining and qp->valid) atomic between different client threads and IO threads. In particular a common template is for an RDMA application to call ib_modify_qp() to move a qp to ERR state and then wait until all the packet and work queues have drained before calling ib_destroy_qp(). None of these state changes are protected by locks to assure that the changes are executed atomically and that memory barriers are included. This has been observed to lead to incorrect behavior around qp cleanup. This patch continues the work of the previous patches in this series and adds locking code around qp state changes and lookups. Link: https://lore.kernel.org/r/20230405042611.6467-5-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
author: Bob Pearson <rpearsonhpe@gmail.com> 2023-04-04 23:26:11 -0500
committer: Jason Gunthorpe <jgg@nvidia.com> 2023-04-17 16:34:04 -0300
commit: f605f26ea196a3b49bea249330cbd18dba61a33e (patch)
tree: fc7bb3a98ec7549d60e7be5ae6db058ad219bb4a /drivers/infiniband/sw/rxe/rxe_req.c
parent: 7b560b89a08d35c23dfc95dc44aee10651c8b9a0 (diff)
1 files changed, 47 insertions, 24 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index f329038efbc8..8e50d116d273 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -102,24 +102,33 @@ void rnr_nak_timer(struct timer_list *t)
 
 	rxe_dbg_qp(qp, "nak timer fired\n");
 
-	/* request a send queue retry */
-	qp->req.need_retry = 1;
-	qp->req.wait_for_rnr_timer = 0;
-	rxe_sched_task(&qp->req.task);
+	spin_lock_bh(&qp->state_lock);
+	if (qp->valid) {
+		/* request a send queue retry */
+		qp->req.need_retry = 1;
+		qp->req.wait_for_rnr_timer = 0;
+		rxe_sched_task(&qp->req.task);
+	}
+	spin_unlock_bh(&qp->state_lock);
 }
 
 static void req_check_sq_drain_done(struct rxe_qp *qp)
 {
-	struct rxe_queue *q = qp->sq.queue;
-	unsigned int index = qp->req.wqe_index;
-	unsigned int cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
-	struct rxe_send_wqe *wqe = queue_addr_from_index(q, cons);
+	struct rxe_queue *q;
+	unsigned int index;
+	unsigned int cons;
+	struct rxe_send_wqe *wqe;
+
+	spin_lock_bh(&qp->state_lock);
+	if (qp_state(qp) == IB_QPS_SQD) {
+		q = qp->sq.queue;
+		index = qp->req.wqe_index;
+		cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
+		wqe = queue_addr_from_index(q, cons);
 
-	if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
 		/* check to see if we are drained;
 		 * state_lock used by requester and completer
 		 */
-		spin_lock_bh(&qp->state_lock);
 		do {
 			if (!qp->attr.sq_draining)
 				/* comp just finished */
@@ -144,28 +153,40 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
 			}
 			return;
 		} while (0);
-		spin_unlock_bh(&qp->state_lock);
 	}
+	spin_unlock_bh(&qp->state_lock);
 }
 
-static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
+static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
 {
-	struct rxe_send_wqe *wqe;
 	struct rxe_queue *q = qp->sq.queue;
 	unsigned int index = qp->req.wqe_index;
 	unsigned int prod;
 
-	req_check_sq_drain_done(qp);
-
 	prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
 	if (index == prod)
 		return NULL;
+	else
+		return queue_addr_from_index(q, index);
+}
 
-	wqe = queue_addr_from_index(q, index);
+static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
+{
+	struct rxe_send_wqe *wqe;
+
+	req_check_sq_drain_done(qp);
+
+	wqe = __req_next_wqe(qp);
+	if (wqe == NULL)
+		return NULL;
 
+	spin_lock(&qp->state_lock);
 	if (unlikely((qp_state(qp) == IB_QPS_SQD) &&
-		     (wqe->state != wqe_state_processing)))
+		     (wqe->state != wqe_state_processing))) {
+		spin_unlock(&qp->state_lock);
 		return NULL;
+	}
+	spin_unlock(&qp->state_lock);
 
 	wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
 	return wqe;
@@ -656,15 +677,16 @@ int rxe_requester(struct rxe_qp *qp)
 	struct rxe_ah *ah;
 	struct rxe_av *av;
 
-	if (unlikely(!qp->valid))
+	spin_lock_bh(&qp->state_lock);
+	if (unlikely(!qp->valid)) {
+		spin_unlock_bh(&qp->state_lock);
 		goto exit;
+	}
 
 	if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
-		wqe = req_next_wqe(qp);
+		wqe = __req_next_wqe(qp);
+		spin_unlock_bh(&qp->state_lock);
 		if (wqe)
-			/*
-			 * Generate an error completion for error qp state
-			 */
 			goto err;
 		else
 			goto exit;
@@ -678,8 +700,10 @@ int rxe_requester(struct rxe_qp *qp)
 		qp->req.wait_psn = 0;
 		qp->req.need_retry = 0;
 		qp->req.wait_for_rnr_timer = 0;
+		spin_unlock_bh(&qp->state_lock);
 		goto exit;
 	}
+	spin_unlock_bh(&qp->state_lock);
 
 	/* we come here if the retransmit timer has fired
 	 * or if the rnr timer has fired. If the retransmit
@@ -839,8 +863,7 @@ err:
 	/* update wqe_index for each wqe completion */
 	qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
 	wqe->state = wqe_state_error;
-	qp->attr.qp_state = IB_QPS_ERR;
-	rxe_sched_task(&qp->comp.task);
+	rxe_qp_error(qp);
 exit:
 	ret = -EAGAIN;
 out:
author	Bob Pearson <rpearsonhpe@gmail.com>	2023-04-04 23:26:11 -0500
committer	Jason Gunthorpe <jgg@nvidia.com>	2023-04-17 16:34:04 -0300
commit	f605f26ea196a3b49bea249330cbd18dba61a33e (patch)
tree	fc7bb3a98ec7549d60e7be5ae6db058ad219bb4a /drivers/infiniband/sw/rxe/rxe_req.c
parent	7b560b89a08d35c23dfc95dc44aee10651c8b9a0 (diff)