diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_comp.c')
| -rw-r--r-- | drivers/infiniband/sw/rxe/rxe_comp.c | 412 |
1 files changed, 229 insertions, 183 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 4bc88708b355..a5b2b62f596b 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/skbuff.h> @@ -130,6 +103,9 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; + case IB_WR_BIND_MW: return IB_WC_BIND_MW; + case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; + case IB_WR_FLUSH: return IB_WC_FLUSH; default: return 0xff; @@ -138,25 +114,24 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) void retransmit_timer(struct timer_list *t) { - struct rxe_qp *qp = from_timer(qp, t, retrans_timer); + struct rxe_qp *qp = timer_container_of(qp, t, retrans_timer); + unsigned long flags; + + rxe_dbg_qp(qp, "retransmit timer fired\n"); + spin_lock_irqsave(&qp->state_lock, flags); if (qp->valid) { qp->comp.timeout = 1; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->send_task); } + spin_unlock_irqrestore(&qp->state_lock, flags); } void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - + rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED); skb_queue_tail(&qp->resp_pkts, skb); - - must_sched = skb_queue_len(&qp->resp_pkts) > 1; - if (must_sched != 0) - rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - - rxe_run_task(&qp->comp.task, must_sched); + rxe_sched_task(&qp->send_task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -168,7 +143,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp, /* we come here whether or not we found a response packet to see if * there are any posted WQEs */ - wqe = queue_head(qp->sq.queue); + wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); *wqe_p = wqe; /* no WQE or requester has not started it yet */ @@ -224,6 +199,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp, */ if (pkt->psn == wqe->last_psn) return COMPST_COMP_ACK; + else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE && + (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST || + qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE)) + return COMPST_CHECK_ACK; else return COMPST_DONE; } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { @@ -252,6 +231,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + /* Check NAK code to handle a remote error */ + if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE) + break; + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { /* read retries of partial data may restart from @@ -282,12 +265,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; - /* fall through */ + if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE) + return COMPST_WRITE_SEND; + + fallthrough; /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && - wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV && + wqe->wr.opcode != IB_WR_FLUSH) { wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } @@ -329,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } } return COMPST_ERROR_RETRY; @@ -347,7 +334,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, return COMPST_ERROR; default: - pr_warn("unexpected nak %x\n", syn); + rxe_dbg_qp(qp, "unexpected nak %x\n", syn); wqe->status = IB_WC_REM_OP_ERR; return COMPST_ERROR; } @@ -358,7 +345,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, break; default: - pr_warn("unexpected opcode\n"); + rxe_dbg_qp(qp, "unexpected opcode\n"); } return COMPST_ERROR; @@ -372,14 +359,16 @@ static inline enum comp_state do_read(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, payload_addr(pkt), - payload_size(pkt), to_mem_obj, NULL); - if (ret) + payload_size(pkt), RXE_TO_MR_OBJ); + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; + } if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) return COMPST_COMP_ACK; - else - return COMPST_UPDATE_COMP; + + return COMPST_UPDATE_COMP; } static inline enum comp_state do_atomic(struct rxe_qp *qp, @@ -392,40 +381,51 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, &atomic_orig, - sizeof(u64), to_mem_obj, NULL); - if (ret) + sizeof(u64), RXE_TO_MR_OBJ); + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; - else - return COMPST_COMP_ACK; + } + + return COMPST_COMP_ACK; } static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_cqe *cqe) { + struct ib_wc *wc = &cqe->ibwc; + struct ib_uverbs_wc *uwc = &cqe->uibwc; + memset(cqe, 0, sizeof(*cqe)); if (!qp->is_user) { - struct ib_wc *wc = &cqe->ibwc; - - wc->wr_id = wqe->wr.wr_id; - wc->status = wqe->status; - wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - wc->wc_flags = IB_WC_WITH_IMM; - wc->byte_len = wqe->dma.length; - wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->qp = &qp->ibqp; } else { - struct ib_uverbs_wc *uwc = &cqe->uibwc; - - uwc->wr_id = wqe->wr.wr_id; - uwc->status = wqe->status; - uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - uwc->wc_flags = IB_WC_WITH_IMM; - uwc->byte_len = wqe->dma.length; - uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->qp_num = qp->ibqp.qp_num; + } + + if (wqe->status == IB_WC_SUCCESS) { + if (!qp->is_user) { + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + } else { + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + } + } else { + if (wqe->status != IB_WC_WR_FLUSH_ERR) + rxe_err_qp(qp, "non-flush error status = %d\n", + wqe->status); } } @@ -441,16 +441,20 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_cqe cqe; + bool post; + + /* do we need to post a completion */ + post = ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + wqe->status != IB_WC_SUCCESS); - if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - wqe->status != IB_WC_SUCCESS) { + if (post) make_send_cqe(qp, wqe, &cqe); - advance_consumer(qp->sq.queue); + + queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); + + if (post) rxe_cq_post(qp->scq, &cqe, 0); - } else { - advance_consumer(qp->sq.queue); - } if (wqe->wr.opcode == IB_WR_SEND || wqe->wr.opcode == IB_WR_SEND_WITH_IMM || @@ -463,32 +467,18 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 0); + qp->req.again = 1; } } -static inline enum comp_state complete_ack(struct rxe_qp *qp, - struct rxe_pkt_info *pkt, - struct rxe_send_wqe *wqe) +static void comp_check_sq_drain_done(struct rxe_qp *qp) { unsigned long flags; - if (wqe->has_rd_atomic) { - wqe->has_rd_atomic = 0; - atomic_inc(&qp->req.rd_atomic); - if (qp->req.need_rd_atomic) { - qp->comp.timeout_retry = 0; - qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 0); - } - } - - if (unlikely(qp->req.state == QP_STATE_DRAIN)) { - /* state_lock used by requester & completer */ - spin_lock_irqsave(&qp->state_lock, flags); - if ((qp->req.state == QP_STATE_DRAIN) && - (qp->comp.psn == qp->req.psn)) { - qp->req.state = QP_STATE_DRAINED; + spin_lock_irqsave(&qp->state_lock, flags); + if (unlikely(qp_state(qp) == IB_QPS_SQD)) { + if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) { + qp->attr.sq_draining = 0; spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->ibqp.event_handler) { @@ -500,11 +490,28 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } - } else { - spin_unlock_irqrestore(&qp->state_lock, flags); + return; + } + } + spin_unlock_irqrestore(&qp->state_lock, flags); +} + +static inline enum comp_state complete_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + qp->req.again = 1; } } + comp_check_sq_drain_done(qp); + do_complete(qp, wqe); if (psn_compare(pkt->psn, qp->comp.psn) >= 0) @@ -525,7 +532,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + qp->req.again = 1; } } @@ -534,43 +541,123 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, return COMPST_GET_WQE; } -static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) +/* drain incoming response packet queue */ +static void drain_resp_pkts(struct rxe_qp *qp) { struct sk_buff *skb; - struct rxe_send_wqe *wqe; while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); + } +} + +/* complete send wqe with flush error */ +static int flush_send_wqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_cqe cqe = {}; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + int err; + + if (qp->is_user) { + uwc->wr_id = wqe->wr.wr_id; + uwc->status = IB_WC_WR_FLUSH_ERR; + uwc->qp_num = qp->ibqp.qp_num; + } else { + wc->wr_id = wqe->wr.wr_id; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->qp = &qp->ibqp; } - while ((wqe = queue_head(qp->sq.queue))) { + err = rxe_cq_post(qp->scq, &cqe, 0); + if (err) + rxe_dbg_cq(qp->scq, "post cq failed, err = %d\n", err); + + return err; +} + +/* drain and optionally complete the send queue + * if unable to complete a wqe, i.e. cq is full, stop + * completing and flush the remaining wqes + */ +static void flush_send_queue(struct rxe_qp *qp, bool notify) +{ + struct rxe_send_wqe *wqe; + struct rxe_queue *q = qp->sq.queue; + int err; + + /* send queue never got created. nothing to do. */ + if (!qp->sq.queue) + return; + + while ((wqe = queue_head(q, q->type))) { if (notify) { - wqe->status = IB_WC_WR_FLUSH_ERR; - do_complete(qp, wqe); - } else { - advance_consumer(qp->sq.queue); + err = flush_send_wqe(qp, wqe); + if (err) + notify = 0; } + queue_advance_consumer(q, q->type); } } -int rxe_completer(void *arg) +static void free_pkt(struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + struct rxe_qp *qp = pkt->qp; + struct ib_device *dev = qp->ibqp.device; + + kfree_skb(skb); + rxe_put(qp); + ib_device_put(dev); +} + +/* reset the retry timer if + * - QP is type RC + * - there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * - the timeout parameter is set + * - the QP is alive + */ +static void reset_retry_timer(struct rxe_qp *qp) +{ + unsigned long flags; + + if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) { + spin_lock_irqsave(&qp->state_lock, flags); + if (qp_state(qp) >= IB_QPS_RTS && + psn_compare(qp->req.psn, qp->comp.psn) > 0) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + spin_unlock_irqrestore(&qp->state_lock, flags); + } +} + +int rxe_completer(struct rxe_qp *qp) { - struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_send_wqe *wqe = NULL; struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; + int ret; + unsigned long flags; + + qp->req.again = 0; - rxe_add_ref(qp); + spin_lock_irqsave(&qp->state_lock, flags); + if (!qp->valid || qp_state(qp) == IB_QPS_ERR || + qp_state(qp) == IB_QPS_RESET) { + bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); - if (!qp->valid || qp->req.state == QP_STATE_ERROR || - qp->req.state == QP_STATE_RESET) { - rxe_drain_resp_pkts(qp, qp->valid && - qp->req.state == QP_STATE_ERROR); + drain_resp_pkts(qp); + flush_send_queue(qp, notify); + spin_unlock_irqrestore(&qp->state_lock, flags); goto exit; } + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->comp.timeout) { qp->comp.timeout_retry = 1; @@ -585,8 +672,7 @@ int rxe_completer(void *arg) state = COMPST_GET_ACK; while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - comp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -644,18 +730,13 @@ int rxe_completer(void *arg) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + qp->req.again = 1; } state = COMPST_DONE; break; case COMPST_DONE: - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } goto done; case COMPST_EXIT: @@ -664,20 +745,7 @@ int rxe_completer(void *arg) break; } - /* re reset the timeout counter if - * (1) QP is type RC - * (2) the QP is alive - * (3) there is a packet sent by the requester that - * might be acked (we still might get spurious - * timeouts but try to keep them as few as possible) - * (4) the timeout parameter is set - */ - if ((qp_type(qp) == IB_QPT_RC) && - (qp->req.state == QP_STATE_READY) && - (psn_compare(qp->req.psn, qp->comp.psn) > 0) && - qp->qp_timeout_jiffies) - mod_timer(&qp->retrans_timer, - jiffies + qp->qp_timeout_jiffies); + reset_retry_timer(qp); goto exit; case COMPST_ERROR_RETRY: @@ -690,23 +758,15 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) { + if (!wqe || (wqe->state == wqe_state_posted)) goto exit; - } /* if we've started a retry, don't start another * retry sequence, unless this is a timeout. */ if (qp->comp.started_retry && - !qp->comp.timeout_retry) { - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + !qp->comp.timeout_retry) goto done; - } if (qp->comp.retry_cnt > 0) { if (qp->comp.retry_cnt != 7) @@ -725,15 +785,8 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 0); - } - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; + qp->req.again = 1; } - goto done; } else { @@ -744,19 +797,20 @@ int rxe_completer(void *arg) break; case COMPST_RNR_RETRY: + /* we come here if we received an RNR NAK */ if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry != 7) qp->comp.rnr_retry--; - qp->req.need_retry = 1; - pr_debug("qp#%d set rnr nak timer\n", - qp_num(qp)); + /* don't start a retry flow until the + * rnr timer has fired + */ + qp->req.wait_for_rnr_timer = 1; + rxe_dbg_qp(qp, "set rnr nak timer\n"); + // TODO who protects from destroy_qp?? mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; goto exit; } else { rxe_counter_inc(rxe, @@ -770,30 +824,22 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - goto exit; } } -exit: - /* we come here if we are done with processing and want the task to - * exit from the loop calling us + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the work item. A zero return + * will continue looping and return to rxe_completer */ - WARN_ON_ONCE(skb); - rxe_drop_ref(qp); - return -EAGAIN; - done: - /* we come here if we have processed a packet we want the task to call - * us again to see if there is anything else to do - */ - WARN_ON_ONCE(skb); - rxe_drop_ref(qp); - return 0; + ret = 0; + goto out; +exit: + ret = (qp->req.again) ? 0 : -EAGAIN; +out: + qp->req.again = 0; + if (pkt) + free_pkt(pkt); + return ret; } |
