diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_comp.c')
| -rw-r--r-- | drivers/infiniband/sw/rxe/rxe_comp.c | 321 |
1 files changed, 198 insertions, 123 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 58ad9c2644f3..a5b2b62f596b 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -104,6 +104,8 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; case IB_WR_BIND_MW: return IB_WC_BIND_MW; + case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; + case IB_WR_FLUSH: return IB_WC_FLUSH; default: return 0xff; @@ -112,25 +114,24 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) void retransmit_timer(struct timer_list *t) { - struct rxe_qp *qp = from_timer(qp, t, retrans_timer); + struct rxe_qp *qp = timer_container_of(qp, t, retrans_timer); + unsigned long flags; + + rxe_dbg_qp(qp, "retransmit timer fired\n"); + spin_lock_irqsave(&qp->state_lock, flags); if (qp->valid) { qp->comp.timeout = 1; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->send_task); } + spin_unlock_irqrestore(&qp->state_lock, flags); } void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - + rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED); skb_queue_tail(&qp->resp_pkts, skb); - - must_sched = skb_queue_len(&qp->resp_pkts) > 1; - if (must_sched != 0) - rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - - rxe_run_task(&qp->comp.task, must_sched); + rxe_sched_task(&qp->send_task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -142,10 +143,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp, /* we come here whether or not we found a response packet to see if * there are any posted WQEs */ - if (qp->is_user) - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER); - else - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL); + wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); *wqe_p = wqe; /* no WQE or requester has not started it yet */ @@ -201,6 +199,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp, */ if (pkt->psn == wqe->last_psn) return COMPST_COMP_ACK; + else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE && + (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST || + qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE)) + return COMPST_CHECK_ACK; else return COMPST_DONE; } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { @@ -229,6 +231,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + /* Check NAK code to handle a remote error */ + if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE) + break; + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { /* read retries of partial data may restart from @@ -259,12 +265,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; + if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE) + return COMPST_WRITE_SEND; + fallthrough; /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && - wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV && + wqe->wr.opcode != IB_WR_FLUSH) { wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } @@ -306,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } } return COMPST_ERROR_RETRY; @@ -324,7 +334,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, return COMPST_ERROR; default: - pr_warn("unexpected nak %x\n", syn); + rxe_dbg_qp(qp, "unexpected nak %x\n", syn); wqe->status = IB_WC_REM_OP_ERR; return COMPST_ERROR; } @@ -335,7 +345,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, break; default: - pr_warn("unexpected opcode\n"); + rxe_dbg_qp(qp, "unexpected opcode\n"); } return COMPST_ERROR; @@ -349,7 +359,7 @@ static inline enum comp_state do_read(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, payload_addr(pkt), - payload_size(pkt), RXE_TO_MR_OBJ, NULL); + payload_size(pkt), RXE_TO_MR_OBJ); if (ret) { wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; @@ -371,7 +381,7 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, &atomic_orig, - sizeof(u64), RXE_TO_MR_OBJ, NULL); + sizeof(u64), RXE_TO_MR_OBJ); if (ret) { wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; @@ -383,30 +393,39 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_cqe *cqe) { + struct ib_wc *wc = &cqe->ibwc; + struct ib_uverbs_wc *uwc = &cqe->uibwc; + memset(cqe, 0, sizeof(*cqe)); if (!qp->is_user) { - struct ib_wc *wc = &cqe->ibwc; - - wc->wr_id = wqe->wr.wr_id; - wc->status = wqe->status; - wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - wc->wc_flags = IB_WC_WITH_IMM; - wc->byte_len = wqe->dma.length; - wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->qp = &qp->ibqp; } else { - struct ib_uverbs_wc *uwc = &cqe->uibwc; - - uwc->wr_id = wqe->wr.wr_id; - uwc->status = wqe->status; - uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - uwc->wc_flags = IB_WC_WITH_IMM; - uwc->byte_len = wqe->dma.length; - uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->qp_num = qp->ibqp.qp_num; + } + + if (wqe->status == IB_WC_SUCCESS) { + if (!qp->is_user) { + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + } else { + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + } + } else { + if (wqe->status != IB_WC_WR_FLUSH_ERR) + rxe_err_qp(qp, "non-flush error status = %d\n", + wqe->status); } } @@ -432,10 +451,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) if (post) make_send_cqe(qp, wqe, &cqe); - if (qp->is_user) - advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER); - else - advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL); + queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); if (post) rxe_cq_post(qp->scq, &cqe, 0); @@ -451,32 +467,18 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } } -static inline enum comp_state complete_ack(struct rxe_qp *qp, - struct rxe_pkt_info *pkt, - struct rxe_send_wqe *wqe) +static void comp_check_sq_drain_done(struct rxe_qp *qp) { unsigned long flags; - if (wqe->has_rd_atomic) { - wqe->has_rd_atomic = 0; - atomic_inc(&qp->req.rd_atomic); - if (qp->req.need_rd_atomic) { - qp->comp.timeout_retry = 0; - qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 0); - } - } - - if (unlikely(qp->req.state == QP_STATE_DRAIN)) { - /* state_lock used by requester & completer */ - spin_lock_irqsave(&qp->state_lock, flags); - if ((qp->req.state == QP_STATE_DRAIN) && - (qp->comp.psn == qp->req.psn)) { - qp->req.state = QP_STATE_DRAINED; + spin_lock_irqsave(&qp->state_lock, flags); + if (unlikely(qp_state(qp) == IB_QPS_SQD)) { + if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) { + qp->attr.sq_draining = 0; spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->ibqp.event_handler) { @@ -488,11 +490,28 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } - } else { - spin_unlock_irqrestore(&qp->state_lock, flags); + return; + } + } + spin_unlock_irqrestore(&qp->state_lock, flags); +} + +static inline enum comp_state complete_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + qp->req.again = 1; } } + comp_check_sq_drain_done(qp); + do_complete(qp, wqe); if (psn_compare(pkt->psn, qp->comp.psn) >= 0) @@ -513,7 +532,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + qp->req.again = 1; } } @@ -522,25 +541,64 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, return COMPST_GET_WQE; } -static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) +/* drain incoming response packet queue */ +static void drain_resp_pkts(struct rxe_qp *qp) { struct sk_buff *skb; - struct rxe_send_wqe *wqe; - struct rxe_queue *q = qp->sq.queue; while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } +} + +/* complete send wqe with flush error */ +static int flush_send_wqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_cqe cqe = {}; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + int err; + + if (qp->is_user) { + uwc->wr_id = wqe->wr.wr_id; + uwc->status = IB_WC_WR_FLUSH_ERR; + uwc->qp_num = qp->ibqp.qp_num; + } else { + wc->wr_id = wqe->wr.wr_id; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->qp = &qp->ibqp; + } + + err = rxe_cq_post(qp->scq, &cqe, 0); + if (err) + rxe_dbg_cq(qp->scq, "post cq failed, err = %d\n", err); + + return err; +} + +/* drain and optionally complete the send queue + * if unable to complete a wqe, i.e. cq is full, stop + * completing and flush the remaining wqes + */ +static void flush_send_queue(struct rxe_qp *qp, bool notify) +{ + struct rxe_send_wqe *wqe; + struct rxe_queue *q = qp->sq.queue; + int err; + + /* send queue never got created. nothing to do. */ + if (!qp->sq.queue) + return; while ((wqe = queue_head(q, q->type))) { if (notify) { - wqe->status = IB_WC_WR_FLUSH_ERR; - do_complete(qp, wqe); - } else { - advance_consumer(q, q->type); + err = flush_send_wqe(qp, wqe); + if (err) + notify = 0; } + queue_advance_consumer(q, q->type); } } @@ -551,29 +609,55 @@ static void free_pkt(struct rxe_pkt_info *pkt) struct ib_device *dev = qp->ibqp.device; kfree_skb(skb); - rxe_drop_ref(qp); + rxe_put(qp); ib_device_put(dev); } -int rxe_completer(void *arg) +/* reset the retry timer if + * - QP is type RC + * - there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * - the timeout parameter is set + * - the QP is alive + */ +static void reset_retry_timer(struct rxe_qp *qp) +{ + unsigned long flags; + + if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) { + spin_lock_irqsave(&qp->state_lock, flags); + if (qp_state(qp) >= IB_QPS_RTS && + psn_compare(qp->req.psn, qp->comp.psn) > 0) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + spin_unlock_irqrestore(&qp->state_lock, flags); + } +} + +int rxe_completer(struct rxe_qp *qp) { - struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_send_wqe *wqe = NULL; struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; - int ret = 0; + int ret; + unsigned long flags; + + qp->req.again = 0; - rxe_add_ref(qp); + spin_lock_irqsave(&qp->state_lock, flags); + if (!qp->valid || qp_state(qp) == IB_QPS_ERR || + qp_state(qp) == IB_QPS_RESET) { + bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); - if (!qp->valid || qp->req.state == QP_STATE_ERROR || - qp->req.state == QP_STATE_RESET) { - rxe_drain_resp_pkts(qp, qp->valid && - qp->req.state == QP_STATE_ERROR); - ret = -EAGAIN; - goto done; + drain_resp_pkts(qp); + flush_send_queue(qp, notify); + spin_unlock_irqrestore(&qp->state_lock, flags); + goto exit; } + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->comp.timeout) { qp->comp.timeout_retry = 1; @@ -582,16 +666,13 @@ int rxe_completer(void *arg) qp->comp.timeout_retry = 0; } - if (qp->req.need_retry) { - ret = -EAGAIN; - goto done; - } + if (qp->req.need_retry) + goto exit; state = COMPST_GET_ACK; while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - comp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -649,7 +730,7 @@ int rxe_completer(void *arg) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + qp->req.again = 1; } state = COMPST_DONE; @@ -664,22 +745,8 @@ int rxe_completer(void *arg) break; } - /* re reset the timeout counter if - * (1) QP is type RC - * (2) the QP is alive - * (3) there is a packet sent by the requester that - * might be acked (we still might get spurious - * timeouts but try to keep them as few as possible) - * (4) the timeout parameter is set - */ - if ((qp_type(qp) == IB_QPT_RC) && - (qp->req.state == QP_STATE_READY) && - (psn_compare(qp->req.psn, qp->comp.psn) > 0) && - qp->qp_timeout_jiffies) - mod_timer(&qp->retrans_timer, - jiffies + qp->qp_timeout_jiffies); - ret = -EAGAIN; - goto done; + reset_retry_timer(qp); + goto exit; case COMPST_ERROR_RETRY: /* we come here if the retry timer fired and we did @@ -691,10 +758,8 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) { - ret = -EAGAIN; - goto done; - } + if (!wqe || (wqe->state == wqe_state_posted)) + goto exit; /* if we've started a retry, don't start another * retry sequence, unless this is a timeout. @@ -720,7 +785,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } goto done; @@ -732,18 +797,21 @@ int rxe_completer(void *arg) break; case COMPST_RNR_RETRY: + /* we come here if we received an RNR NAK */ if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry != 7) qp->comp.rnr_retry--; - qp->req.need_retry = 1; - pr_debug("qp#%d set rnr nak timer\n", - qp_num(qp)); + /* don't start a retry flow until the + * rnr timer has fired + */ + qp->req.wait_for_rnr_timer = 1; + rxe_dbg_qp(qp, "set rnr nak timer\n"); + // TODO who protects from destroy_qp?? mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - ret = -EAGAIN; - goto done; + goto exit; } else { rxe_counter_inc(rxe, RXE_CNT_RNR_RETRY_EXCEEDED); @@ -756,15 +824,22 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - ret = -EAGAIN; - goto done; + goto exit; } } + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the work item. A zero return + * will continue looping and return to rxe_completer + */ done: + ret = 0; + goto out; +exit: + ret = (qp->req.again) ? 0 : -EAGAIN; +out: + qp->req.again = 0; if (pkt) free_pkt(pkt); - rxe_drop_ref(qp); - return ret; } |
