diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_comp.c')
| -rw-r--r-- | drivers/infiniband/sw/rxe/rxe_comp.c | 264 |
1 files changed, 171 insertions, 93 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index f363fe3fa414..a5b2b62f596b 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -104,6 +104,8 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; case IB_WR_BIND_MW: return IB_WC_BIND_MW; + case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; + case IB_WR_FLUSH: return IB_WC_FLUSH; default: return 0xff; @@ -112,25 +114,24 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) void retransmit_timer(struct timer_list *t) { - struct rxe_qp *qp = from_timer(qp, t, retrans_timer); + struct rxe_qp *qp = timer_container_of(qp, t, retrans_timer); + unsigned long flags; + rxe_dbg_qp(qp, "retransmit timer fired\n"); + + spin_lock_irqsave(&qp->state_lock, flags); if (qp->valid) { qp->comp.timeout = 1; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->send_task); } + spin_unlock_irqrestore(&qp->state_lock, flags); } void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - + rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED); skb_queue_tail(&qp->resp_pkts, skb); - - must_sched = skb_queue_len(&qp->resp_pkts) > 1; - if (must_sched != 0) - rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - - rxe_run_task(&qp->comp.task, must_sched); + rxe_sched_task(&qp->send_task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -198,6 +199,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp, */ if (pkt->psn == wqe->last_psn) return COMPST_COMP_ACK; + else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE && + (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST || + qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE)) + return COMPST_CHECK_ACK; else return COMPST_DONE; } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { @@ -226,6 +231,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + /* Check NAK code to handle a remote error */ + if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE) + break; + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { /* read retries of partial data may restart from @@ -256,12 +265,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; + if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE) + return COMPST_WRITE_SEND; + fallthrough; /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && - wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV && + wqe->wr.opcode != IB_WR_FLUSH) { wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } @@ -303,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } } return COMPST_ERROR_RETRY; @@ -321,7 +334,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, return COMPST_ERROR; default: - pr_warn("unexpected nak %x\n", syn); + rxe_dbg_qp(qp, "unexpected nak %x\n", syn); wqe->status = IB_WC_REM_OP_ERR; return COMPST_ERROR; } @@ -332,7 +345,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, break; default: - pr_warn("unexpected opcode\n"); + rxe_dbg_qp(qp, "unexpected opcode\n"); } return COMPST_ERROR; @@ -409,6 +422,10 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, uwc->wc_flags = IB_WC_WITH_IMM; uwc->byte_len = wqe->dma.length; } + } else { + if (wqe->status != IB_WC_WR_FLUSH_ERR) + rxe_err_qp(qp, "non-flush error status = %d\n", + wqe->status); } } @@ -450,31 +467,19 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } } -static inline enum comp_state complete_ack(struct rxe_qp *qp, - struct rxe_pkt_info *pkt, - struct rxe_send_wqe *wqe) +static void comp_check_sq_drain_done(struct rxe_qp *qp) { - if (wqe->has_rd_atomic) { - wqe->has_rd_atomic = 0; - atomic_inc(&qp->req.rd_atomic); - if (qp->req.need_rd_atomic) { - qp->comp.timeout_retry = 0; - qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 0); - } - } + unsigned long flags; - if (unlikely(qp->req.state == QP_STATE_DRAIN)) { - /* state_lock used by requester & completer */ - spin_lock_bh(&qp->state_lock); - if ((qp->req.state == QP_STATE_DRAIN) && - (qp->comp.psn == qp->req.psn)) { - qp->req.state = QP_STATE_DRAINED; - spin_unlock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); + if (unlikely(qp_state(qp) == IB_QPS_SQD)) { + if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) { + qp->attr.sq_draining = 0; + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->ibqp.event_handler) { struct ib_event ev; @@ -485,10 +490,27 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } - } else { - spin_unlock_bh(&qp->state_lock); + return; } } + spin_unlock_irqrestore(&qp->state_lock, flags); +} + +static inline enum comp_state complete_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + qp->req.again = 1; + } + } + + comp_check_sq_drain_done(qp); do_complete(qp, wqe); @@ -510,7 +532,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + qp->req.again = 1; } } @@ -519,25 +541,64 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, return COMPST_GET_WQE; } -static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) +/* drain incoming response packet queue */ +static void drain_resp_pkts(struct rxe_qp *qp) { struct sk_buff *skb; - struct rxe_send_wqe *wqe; - struct rxe_queue *q = qp->sq.queue; while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } +} + +/* complete send wqe with flush error */ +static int flush_send_wqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_cqe cqe = {}; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + int err; + + if (qp->is_user) { + uwc->wr_id = wqe->wr.wr_id; + uwc->status = IB_WC_WR_FLUSH_ERR; + uwc->qp_num = qp->ibqp.qp_num; + } else { + wc->wr_id = wqe->wr.wr_id; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->qp = &qp->ibqp; + } + + err = rxe_cq_post(qp->scq, &cqe, 0); + if (err) + rxe_dbg_cq(qp->scq, "post cq failed, err = %d\n", err); + + return err; +} + +/* drain and optionally complete the send queue + * if unable to complete a wqe, i.e. cq is full, stop + * completing and flush the remaining wqes + */ +static void flush_send_queue(struct rxe_qp *qp, bool notify) +{ + struct rxe_send_wqe *wqe; + struct rxe_queue *q = qp->sq.queue; + int err; + + /* send queue never got created. nothing to do. */ + if (!qp->sq.queue) + return; while ((wqe = queue_head(q, q->type))) { if (notify) { - wqe->status = IB_WC_WR_FLUSH_ERR; - do_complete(qp, wqe); - } else { - queue_advance_consumer(q, q->type); + err = flush_send_wqe(qp, wqe); + if (err) + notify = 0; } + queue_advance_consumer(q, q->type); } } @@ -548,29 +609,55 @@ static void free_pkt(struct rxe_pkt_info *pkt) struct ib_device *dev = qp->ibqp.device; kfree_skb(skb); - rxe_drop_ref(qp); + rxe_put(qp); ib_device_put(dev); } -int rxe_completer(void *arg) +/* reset the retry timer if + * - QP is type RC + * - there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * - the timeout parameter is set + * - the QP is alive + */ +static void reset_retry_timer(struct rxe_qp *qp) +{ + unsigned long flags; + + if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) { + spin_lock_irqsave(&qp->state_lock, flags); + if (qp_state(qp) >= IB_QPS_RTS && + psn_compare(qp->req.psn, qp->comp.psn) > 0) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + spin_unlock_irqrestore(&qp->state_lock, flags); + } +} + +int rxe_completer(struct rxe_qp *qp) { - struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_send_wqe *wqe = NULL; struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; - int ret = 0; + int ret; + unsigned long flags; - rxe_add_ref(qp); + qp->req.again = 0; - if (!qp->valid || qp->req.state == QP_STATE_ERROR || - qp->req.state == QP_STATE_RESET) { - rxe_drain_resp_pkts(qp, qp->valid && - qp->req.state == QP_STATE_ERROR); - ret = -EAGAIN; - goto done; + spin_lock_irqsave(&qp->state_lock, flags); + if (!qp->valid || qp_state(qp) == IB_QPS_ERR || + qp_state(qp) == IB_QPS_RESET) { + bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); + + drain_resp_pkts(qp); + flush_send_queue(qp, notify); + spin_unlock_irqrestore(&qp->state_lock, flags); + goto exit; } + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->comp.timeout) { qp->comp.timeout_retry = 1; @@ -579,16 +666,13 @@ int rxe_completer(void *arg) qp->comp.timeout_retry = 0; } - if (qp->req.need_retry) { - ret = -EAGAIN; - goto done; - } + if (qp->req.need_retry) + goto exit; state = COMPST_GET_ACK; while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - comp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -646,7 +730,7 @@ int rxe_completer(void *arg) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + qp->req.again = 1; } state = COMPST_DONE; @@ -661,22 +745,8 @@ int rxe_completer(void *arg) break; } - /* re reset the timeout counter if - * (1) QP is type RC - * (2) the QP is alive - * (3) there is a packet sent by the requester that - * might be acked (we still might get spurious - * timeouts but try to keep them as few as possible) - * (4) the timeout parameter is set - */ - if ((qp_type(qp) == IB_QPT_RC) && - (qp->req.state == QP_STATE_READY) && - (psn_compare(qp->req.psn, qp->comp.psn) > 0) && - qp->qp_timeout_jiffies) - mod_timer(&qp->retrans_timer, - jiffies + qp->qp_timeout_jiffies); - ret = -EAGAIN; - goto done; + reset_retry_timer(qp); + goto exit; case COMPST_ERROR_RETRY: /* we come here if the retry timer fired and we did @@ -688,10 +758,8 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) { - ret = -EAGAIN; - goto done; - } + if (!wqe || (wqe->state == wqe_state_posted)) + goto exit; /* if we've started a retry, don't start another * retry sequence, unless this is a timeout. @@ -717,7 +785,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } goto done; @@ -729,18 +797,21 @@ int rxe_completer(void *arg) break; case COMPST_RNR_RETRY: + /* we come here if we received an RNR NAK */ if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry != 7) qp->comp.rnr_retry--; - qp->req.need_retry = 1; - pr_debug("qp#%d set rnr nak timer\n", - qp_num(qp)); + /* don't start a retry flow until the + * rnr timer has fired + */ + qp->req.wait_for_rnr_timer = 1; + rxe_dbg_qp(qp, "set rnr nak timer\n"); + // TODO who protects from destroy_qp?? mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - ret = -EAGAIN; - goto done; + goto exit; } else { rxe_counter_inc(rxe, RXE_CNT_RNR_RETRY_EXCEEDED); @@ -753,15 +824,22 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - ret = -EAGAIN; - goto done; + goto exit; } } + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the work item. A zero return + * will continue looping and return to rxe_completer + */ done: + ret = 0; + goto out; +exit: + ret = (qp->req.again) ? 0 : -EAGAIN; +out: + qp->req.again = 0; if (pkt) free_pkt(pkt); - rxe_drop_ref(qp); - return ret; } |
