summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw/rxe/rxe_comp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_comp.c')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c321
1 files changed, 198 insertions, 123 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 58ad9c2644f3..a5b2b62f596b 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -104,6 +104,8 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
case IB_WR_BIND_MW: return IB_WC_BIND_MW;
+ case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE;
+ case IB_WR_FLUSH: return IB_WC_FLUSH;
default:
return 0xff;
@@ -112,25 +114,24 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
void retransmit_timer(struct timer_list *t)
{
- struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
+ struct rxe_qp *qp = timer_container_of(qp, t, retrans_timer);
+ unsigned long flags;
+
+ rxe_dbg_qp(qp, "retransmit timer fired\n");
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->send_task);
}
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
{
- int must_sched;
-
+ rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED);
skb_queue_tail(&qp->resp_pkts, skb);
-
- must_sched = skb_queue_len(&qp->resp_pkts) > 1;
- if (must_sched != 0)
- rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
-
- rxe_run_task(&qp->comp.task, must_sched);
+ rxe_sched_task(&qp->send_task);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
@@ -142,10 +143,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp,
/* we come here whether or not we found a response packet to see if
* there are any posted WQEs
*/
- if (qp->is_user)
- wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER);
- else
- wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL);
+ wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
*wqe_p = wqe;
/* no WQE or requester has not started it yet */
@@ -201,6 +199,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp,
*/
if (pkt->psn == wqe->last_psn)
return COMPST_COMP_ACK;
+ else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE &&
+ (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST ||
+ qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE))
+ return COMPST_CHECK_ACK;
else
return COMPST_DONE;
} else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
@@ -229,6 +231,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+ /* Check NAK code to handle a remote error */
+ if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE)
+ break;
+
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
/* read retries of partial data may restart from
@@ -259,12 +265,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
+ if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE)
+ return COMPST_WRITE_SEND;
+
fallthrough;
/* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
*/
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (wqe->wr.opcode != IB_WR_RDMA_READ &&
- wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
+ wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV &&
+ wqe->wr.opcode != IB_WR_FLUSH) {
wqe->status = IB_WC_FATAL_ERR;
return COMPST_ERROR;
}
@@ -306,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 0);
+ qp->req.again = 1;
}
}
return COMPST_ERROR_RETRY;
@@ -324,7 +334,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
return COMPST_ERROR;
default:
- pr_warn("unexpected nak %x\n", syn);
+ rxe_dbg_qp(qp, "unexpected nak %x\n", syn);
wqe->status = IB_WC_REM_OP_ERR;
return COMPST_ERROR;
}
@@ -335,7 +345,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
break;
default:
- pr_warn("unexpected opcode\n");
+ rxe_dbg_qp(qp, "unexpected opcode\n");
}
return COMPST_ERROR;
@@ -349,7 +359,7 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, payload_addr(pkt),
- payload_size(pkt), RXE_TO_MR_OBJ, NULL);
+ payload_size(pkt), RXE_TO_MR_OBJ);
if (ret) {
wqe->status = IB_WC_LOC_PROT_ERR;
return COMPST_ERROR;
@@ -371,7 +381,7 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, &atomic_orig,
- sizeof(u64), RXE_TO_MR_OBJ, NULL);
+ sizeof(u64), RXE_TO_MR_OBJ);
if (ret) {
wqe->status = IB_WC_LOC_PROT_ERR;
return COMPST_ERROR;
@@ -383,30 +393,39 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_cqe *cqe)
{
+ struct ib_wc *wc = &cqe->ibwc;
+ struct ib_uverbs_wc *uwc = &cqe->uibwc;
+
memset(cqe, 0, sizeof(*cqe));
if (!qp->is_user) {
- struct ib_wc *wc = &cqe->ibwc;
-
- wc->wr_id = wqe->wr.wr_id;
- wc->status = wqe->status;
- wc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
- wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->byte_len = wqe->dma.length;
- wc->qp = &qp->ibqp;
+ wc->wr_id = wqe->wr.wr_id;
+ wc->status = wqe->status;
+ wc->qp = &qp->ibqp;
} else {
- struct ib_uverbs_wc *uwc = &cqe->uibwc;
-
- uwc->wr_id = wqe->wr.wr_id;
- uwc->status = wqe->status;
- uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
- wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
- uwc->wc_flags = IB_WC_WITH_IMM;
- uwc->byte_len = wqe->dma.length;
- uwc->qp_num = qp->ibqp.qp_num;
+ uwc->wr_id = wqe->wr.wr_id;
+ uwc->status = wqe->status;
+ uwc->qp_num = qp->ibqp.qp_num;
+ }
+
+ if (wqe->status == IB_WC_SUCCESS) {
+ if (!qp->is_user) {
+ wc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->byte_len = wqe->dma.length;
+ } else {
+ uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode);
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ uwc->wc_flags = IB_WC_WITH_IMM;
+ uwc->byte_len = wqe->dma.length;
+ }
+ } else {
+ if (wqe->status != IB_WC_WR_FLUSH_ERR)
+ rxe_err_qp(qp, "non-flush error status = %d\n",
+ wqe->status);
}
}
@@ -432,10 +451,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
if (post)
make_send_cqe(qp, wqe, &cqe);
- if (qp->is_user)
- advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER);
- else
- advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL);
+ queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
if (post)
rxe_cq_post(qp->scq, &cqe, 0);
@@ -451,32 +467,18 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_run_task(&qp->req.task, 0);
+ qp->req.again = 1;
}
}
-static inline enum comp_state complete_ack(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt,
- struct rxe_send_wqe *wqe)
+static void comp_check_sq_drain_done(struct rxe_qp *qp)
{
unsigned long flags;
- if (wqe->has_rd_atomic) {
- wqe->has_rd_atomic = 0;
- atomic_inc(&qp->req.rd_atomic);
- if (qp->req.need_rd_atomic) {
- qp->comp.timeout_retry = 0;
- qp->req.need_rd_atomic = 0;
- rxe_run_task(&qp->req.task, 0);
- }
- }
-
- if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
- /* state_lock used by requester & completer */
- spin_lock_irqsave(&qp->state_lock, flags);
- if ((qp->req.state == QP_STATE_DRAIN) &&
- (qp->comp.psn == qp->req.psn)) {
- qp->req.state = QP_STATE_DRAINED;
+ spin_lock_irqsave(&qp->state_lock, flags);
+ if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
+ if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
+ qp->attr.sq_draining = 0;
spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
@@ -488,11 +490,28 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
qp->ibqp.event_handler(&ev,
qp->ibqp.qp_context);
}
- } else {
- spin_unlock_irqrestore(&qp->state_lock, flags);
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+}
+
+static inline enum comp_state complete_ack(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ if (wqe->has_rd_atomic) {
+ wqe->has_rd_atomic = 0;
+ atomic_inc(&qp->req.rd_atomic);
+ if (qp->req.need_rd_atomic) {
+ qp->comp.timeout_retry = 0;
+ qp->req.need_rd_atomic = 0;
+ qp->req.again = 1;
}
}
+ comp_check_sq_drain_done(qp);
+
do_complete(qp, wqe);
if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
@@ -513,7 +532,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ qp->req.again = 1;
}
}
@@ -522,25 +541,64 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
return COMPST_GET_WQE;
}
-static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
+/* drain incoming response packet queue */
+static void drain_resp_pkts(struct rxe_qp *qp)
{
struct sk_buff *skb;
- struct rxe_send_wqe *wqe;
- struct rxe_queue *q = qp->sq.queue;
while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
+}
+
+/* complete send wqe with flush error */
+static int flush_send_wqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ struct rxe_cqe cqe = {};
+ struct ib_wc *wc = &cqe.ibwc;
+ struct ib_uverbs_wc *uwc = &cqe.uibwc;
+ int err;
+
+ if (qp->is_user) {
+ uwc->wr_id = wqe->wr.wr_id;
+ uwc->status = IB_WC_WR_FLUSH_ERR;
+ uwc->qp_num = qp->ibqp.qp_num;
+ } else {
+ wc->wr_id = wqe->wr.wr_id;
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->qp = &qp->ibqp;
+ }
+
+ err = rxe_cq_post(qp->scq, &cqe, 0);
+ if (err)
+ rxe_dbg_cq(qp->scq, "post cq failed, err = %d\n", err);
+
+ return err;
+}
+
+/* drain and optionally complete the send queue
+ * if unable to complete a wqe, i.e. cq is full, stop
+ * completing and flush the remaining wqes
+ */
+static void flush_send_queue(struct rxe_qp *qp, bool notify)
+{
+ struct rxe_send_wqe *wqe;
+ struct rxe_queue *q = qp->sq.queue;
+ int err;
+
+ /* send queue never got created. nothing to do. */
+ if (!qp->sq.queue)
+ return;
while ((wqe = queue_head(q, q->type))) {
if (notify) {
- wqe->status = IB_WC_WR_FLUSH_ERR;
- do_complete(qp, wqe);
- } else {
- advance_consumer(q, q->type);
+ err = flush_send_wqe(qp, wqe);
+ if (err)
+ notify = 0;
}
+ queue_advance_consumer(q, q->type);
}
}
@@ -551,29 +609,55 @@ static void free_pkt(struct rxe_pkt_info *pkt)
struct ib_device *dev = qp->ibqp.device;
kfree_skb(skb);
- rxe_drop_ref(qp);
+ rxe_put(qp);
ib_device_put(dev);
}
-int rxe_completer(void *arg)
+/* reset the retry timer if
+ * - QP is type RC
+ * - there is a packet sent by the requester that
+ * might be acked (we still might get spurious
+ * timeouts but try to keep them as few as possible)
+ * - the timeout parameter is set
+ * - the QP is alive
+ */
+static void reset_retry_timer(struct rxe_qp *qp)
+{
+ unsigned long flags;
+
+ if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
+ spin_lock_irqsave(&qp->state_lock, flags);
+ if (qp_state(qp) >= IB_QPS_RTS &&
+ psn_compare(qp->req.psn, qp->comp.psn) > 0)
+ mod_timer(&qp->retrans_timer,
+ jiffies + qp->qp_timeout_jiffies);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+ }
+}
+
+int rxe_completer(struct rxe_qp *qp)
{
- struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_send_wqe *wqe = NULL;
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
- int ret = 0;
+ int ret;
+ unsigned long flags;
+
+ qp->req.again = 0;
- rxe_add_ref(qp);
+ spin_lock_irqsave(&qp->state_lock, flags);
+ if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
+ qp_state(qp) == IB_QPS_RESET) {
+ bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
- if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
- qp->req.state == QP_STATE_RESET) {
- rxe_drain_resp_pkts(qp, qp->valid &&
- qp->req.state == QP_STATE_ERROR);
- ret = -EAGAIN;
- goto done;
+ drain_resp_pkts(qp);
+ flush_send_queue(qp, notify);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+ goto exit;
}
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->comp.timeout) {
qp->comp.timeout_retry = 1;
@@ -582,16 +666,13 @@ int rxe_completer(void *arg)
qp->comp.timeout_retry = 0;
}
- if (qp->req.need_retry) {
- ret = -EAGAIN;
- goto done;
- }
+ if (qp->req.need_retry)
+ goto exit;
state = COMPST_GET_ACK;
while (1) {
- pr_debug("qp#%d state = %s\n", qp_num(qp),
- comp_state_name[state]);
+ rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]);
switch (state) {
case COMPST_GET_ACK:
skb = skb_dequeue(&qp->resp_pkts);
@@ -649,7 +730,7 @@ int rxe_completer(void *arg)
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ qp->req.again = 1;
}
state = COMPST_DONE;
@@ -664,22 +745,8 @@ int rxe_completer(void *arg)
break;
}
- /* re reset the timeout counter if
- * (1) QP is type RC
- * (2) the QP is alive
- * (3) there is a packet sent by the requester that
- * might be acked (we still might get spurious
- * timeouts but try to keep them as few as possible)
- * (4) the timeout parameter is set
- */
- if ((qp_type(qp) == IB_QPT_RC) &&
- (qp->req.state == QP_STATE_READY) &&
- (psn_compare(qp->req.psn, qp->comp.psn) > 0) &&
- qp->qp_timeout_jiffies)
- mod_timer(&qp->retrans_timer,
- jiffies + qp->qp_timeout_jiffies);
- ret = -EAGAIN;
- goto done;
+ reset_retry_timer(qp);
+ goto exit;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
@@ -691,10 +758,8 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted)) {
- ret = -EAGAIN;
- goto done;
- }
+ if (!wqe || (wqe->state == wqe_state_posted))
+ goto exit;
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
@@ -720,7 +785,7 @@ int rxe_completer(void *arg)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_run_task(&qp->req.task, 0);
+ qp->req.again = 1;
}
goto done;
@@ -732,18 +797,21 @@ int rxe_completer(void *arg)
break;
case COMPST_RNR_RETRY:
+ /* we come here if we received an RNR NAK */
if (qp->comp.rnr_retry > 0) {
if (qp->comp.rnr_retry != 7)
qp->comp.rnr_retry--;
- qp->req.need_retry = 1;
- pr_debug("qp#%d set rnr nak timer\n",
- qp_num(qp));
+ /* don't start a retry flow until the
+ * rnr timer has fired
+ */
+ qp->req.wait_for_rnr_timer = 1;
+ rxe_dbg_qp(qp, "set rnr nak timer\n");
+ // TODO who protects from destroy_qp??
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
- ret = -EAGAIN;
- goto done;
+ goto exit;
} else {
rxe_counter_inc(rxe,
RXE_CNT_RNR_RETRY_EXCEEDED);
@@ -756,15 +824,22 @@ int rxe_completer(void *arg)
WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
- ret = -EAGAIN;
- goto done;
+ goto exit;
}
}
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the work item. A zero return
+ * will continue looping and return to rxe_completer
+ */
done:
+ ret = 0;
+ goto out;
+exit:
+ ret = (qp->req.again) ? 0 : -EAGAIN;
+out:
+ qp->req.again = 0;
if (pkt)
free_pkt(pkt);
- rxe_drop_ref(qp);
-
return ret;
}