diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_resp.c')
| -rw-r--r-- | drivers/infiniband/sw/rxe/rxe_resp.c | 978 |
1 files changed, 627 insertions, 351 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 3743dc39b60c..711f73e0bbb1 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -10,40 +10,6 @@ #include "rxe_loc.h" #include "rxe_queue.h" -enum resp_states { - RESPST_NONE, - RESPST_GET_REQ, - RESPST_CHK_PSN, - RESPST_CHK_OP_SEQ, - RESPST_CHK_OP_VALID, - RESPST_CHK_RESOURCE, - RESPST_CHK_LENGTH, - RESPST_CHK_RKEY, - RESPST_EXECUTE, - RESPST_READ_REPLY, - RESPST_COMPLETE, - RESPST_ACKNOWLEDGE, - RESPST_CLEANUP, - RESPST_DUPLICATE_REQUEST, - RESPST_ERR_MALFORMED_WQE, - RESPST_ERR_UNSUPPORTED_OPCODE, - RESPST_ERR_MISALIGNED_ATOMIC, - RESPST_ERR_PSN_OUT_OF_SEQ, - RESPST_ERR_MISSING_OPCODE_FIRST, - RESPST_ERR_MISSING_OPCODE_LAST_C, - RESPST_ERR_MISSING_OPCODE_LAST_D1E, - RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, - RESPST_ERR_RNR, - RESPST_ERR_RKEY_VIOLATION, - RESPST_ERR_INVALIDATE_RKEY, - RESPST_ERR_LENGTH, - RESPST_ERR_CQ_OVERFLOW, - RESPST_ERROR, - RESPST_RESET, - RESPST_DONE, - RESPST_EXIT, -}; - static char *resp_state_name[] = { [RESPST_NONE] = "NONE", [RESPST_GET_REQ] = "GET_REQ", @@ -55,6 +21,9 @@ static char *resp_state_name[] = { [RESPST_CHK_RKEY] = "CHK_RKEY", [RESPST_EXECUTE] = "EXECUTE", [RESPST_READ_REPLY] = "READ_REPLY", + [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", + [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", + [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -73,7 +42,6 @@ static char *resp_state_name[] = { [RESPST_ERR_LENGTH] = "ERR_LENGTH", [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", [RESPST_ERROR] = "ERROR", - [RESPST_RESET] = "RESET", [RESPST_DONE] = "DONE", [RESPST_EXIT] = "EXIT", }; @@ -81,15 +49,8 @@ static char *resp_state_name[] = { /* rxe_recv calls here to add a request packet to the input queue */ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { - int must_sched; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - skb_queue_tail(&qp->req_pkts, skb); - - must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || - (skb_queue_len(&qp->req_pkts) > 1); - - rxe_run_task(&qp->resp.task, must_sched); + rxe_sched_task(&qp->recv_task); } static inline enum resp_states get_req(struct rxe_qp *qp, @@ -97,17 +58,6 @@ static inline enum resp_states get_req(struct rxe_qp *qp, { struct sk_buff *skb; - if (qp->resp.state == QP_STATE_ERROR) { - while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); - kfree_skb(skb); - ib_device_put(qp->ibqp.device); - } - - /* go drain recv wr queue */ - return RESPST_CHK_RESOURCE; - } - skb = skb_peek(&qp->req_pkts); if (!skb) return RESPST_EXIT; @@ -251,19 +201,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp, } } +static bool check_qp_attr_access(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + if (((pkt->mask & RXE_READ_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || + ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || + ((pkt->mask & RXE_ATOMIC_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + return false; + + if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if ((flush_type & IB_FLUSH_GLOBAL && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || + (flush_type & IB_FLUSH_PERSISTENT && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) + return false; + } + + return true; +} + static enum resp_states check_op_valid(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { switch (qp_type(qp)) { case IB_QPT_RC: - if (((pkt->mask & RXE_READ_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || - ((pkt->mask & RXE_WRITE_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || - ((pkt->mask & RXE_ATOMIC_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { + if (!check_qp_attr_access(qp, pkt)) return RESPST_ERR_UNSUPPORTED_OPCODE; - } break; @@ -277,7 +245,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp, break; case IB_QPT_UD: - case IB_QPT_SMI: case IB_QPT_GSI: break; @@ -297,49 +264,42 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) struct ib_event ev; unsigned int count; size_t size; + unsigned long flags; if (srq->error) return RESPST_ERR_RNR; - spin_lock_bh(&srq->rq.consumer_lock); + spin_lock_irqsave(&srq->rq.consumer_lock, flags); - if (qp->is_user) - wqe = queue_head(q, QUEUE_TYPE_FROM_USER); - else - wqe = queue_head(q, QUEUE_TYPE_KERNEL); + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); if (!wqe) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_ERR_RNR; } /* don't trust user space data */ if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { - spin_unlock_bh(&srq->rq.consumer_lock); - pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); + rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); return RESPST_ERR_MALFORMED_WQE; } - size = sizeof(wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); + size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; - if (qp->is_user) { - advance_consumer(q, QUEUE_TYPE_FROM_USER); - count = queue_count(q, QUEUE_TYPE_FROM_USER); - } else { - advance_consumer(q, QUEUE_TYPE_KERNEL); - count = queue_count(q, QUEUE_TYPE_KERNEL); - } + queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); + count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { srq->limit = 0; goto event; } - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_CHK_LENGTH; event: - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; @@ -352,29 +312,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, { struct rxe_srq *srq = qp->srq; - if (qp->resp.state == QP_STATE_ERROR) { - if (qp->resp.wqe) { - qp->resp.status = IB_WC_WR_FLUSH_ERR; - return RESPST_COMPLETE; - } else if (!srq) { - if (qp->is_user) - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_KERNEL); - if (qp->resp.wqe) { - qp->resp.status = IB_WC_WR_FLUSH_ERR; - return RESPST_COMPLETE; - } else { - return RESPST_EXIT; - } - } else { - return RESPST_EXIT; - } - } - - if (pkt->mask & RXE_READ_OR_ATOMIC) { + if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue @@ -389,33 +327,106 @@ static enum resp_states check_resource(struct rxe_qp *qp, if (srq) return get_srq_wqe(qp); - if (qp->is_user) - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_KERNEL); + qp->resp.wqe = queue_head(qp->rq.queue, + QUEUE_TYPE_FROM_CLIENT); return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; } return RESPST_CHK_LENGTH; } -static enum resp_states check_length(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { - switch (qp_type(qp)) { - case IB_QPT_RC: - return RESPST_CHK_RKEY; + /* + * See IBA C9-92 + * For UD QPs we only check if the packet will fit in the + * receive buffer later. For RDMA operations additional + * length checks are performed in check_rkey. + */ + if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) { + unsigned int payload = payload_size(pkt); + unsigned int recv_buffer_len = 0; + int i; + + for (i = 0; i < qp->resp.wqe->dma.num_sge; i++) + recv_buffer_len += qp->resp.wqe->dma.sge[i].length; + if (payload + sizeof(union rdma_network_hdr) > recv_buffer_len) { + rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n"); + return RESPST_ERR_LENGTH; + } + } - case IB_QPT_UC: - return RESPST_CHK_RKEY; + if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || + (qp_type(qp) == IB_QPT_UC))) { + unsigned int mtu = qp->mtu; + unsigned int payload = payload_size(pkt); - default: - return RESPST_CHK_RKEY; + if ((pkt->mask & RXE_START_MASK) && + (pkt->mask & RXE_END_MASK)) { + if (unlikely(payload > mtu)) { + rxe_dbg_qp(qp, "only packet too long\n"); + return RESPST_ERR_LENGTH; + } + } else if ((pkt->mask & RXE_START_MASK) || + (pkt->mask & RXE_MIDDLE_MASK)) { + if (unlikely(payload != mtu)) { + rxe_dbg_qp(qp, "first or middle packet not mtu\n"); + return RESPST_ERR_LENGTH; + } + } else if (pkt->mask & RXE_END_MASK) { + if (unlikely((payload == 0) || (payload > mtu))) { + rxe_dbg_qp(qp, "last packet zero or too long\n"); + return RESPST_ERR_LENGTH; + } + } + } + + /* See IBA C9-94 */ + if (pkt->mask & RXE_RETH_MASK) { + if (reth_len(pkt) > (1U << 31)) { + rxe_dbg_qp(qp, "dma length too long\n"); + return RESPST_ERR_LENGTH; + } } + + if (pkt->mask & RXE_RDMA_OP_MASK) + return RESPST_CHK_RKEY; + else + return RESPST_EXECUTE; +} + +/* if the reth length field is zero we can assume nothing + * about the rkey value and should not validate or use it. + * Instead set qp->resp.rkey to 0 which is an invalid rkey + * value since the minimum index part is 1. + */ +static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + unsigned int length = reth_len(pkt); + + qp->resp.va = reth_va(pkt); + qp->resp.offset = 0; + qp->resp.resid = length; + qp->resp.length = length; + if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0) + qp->resp.rkey = 0; + else + qp->resp.rkey = reth_rkey(pkt); } +static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->resp.va = atmeth_va(pkt); + qp->resp.offset = 0; + qp->resp.rkey = atmeth_rkey(pkt); + qp->resp.resid = sizeof(u64); +} + +/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL + * if an invalid rkey is received or the rdma length is zero. For middle + * or last packets use the stored value of mr. + */ static enum resp_states check_rkey(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { @@ -427,32 +438,42 @@ static enum resp_states check_rkey(struct rxe_qp *qp, u32 pktlen; int mtu = qp->mtu; enum resp_states state; - int access; - - if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { - if (pkt->mask & RXE_RETH_MASK) { - qp->resp.va = reth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = reth_rkey(pkt); - qp->resp.resid = reth_len(pkt); - qp->resp.length = reth_len(pkt); - } + int access = 0; + + /* parse RETH or ATMETH header for first/only packets + * for va, length, rkey, etc. or use current value for + * middle/last packets. + */ + if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ : IB_ACCESS_REMOTE_WRITE; + } else if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + + if (flush_type & IB_FLUSH_GLOBAL) + access |= IB_ACCESS_FLUSH_GLOBAL; + if (flush_type & IB_FLUSH_PERSISTENT) + access |= IB_ACCESS_FLUSH_PERSISTENT; } else if (pkt->mask & RXE_ATOMIC_MASK) { - qp->resp.va = atmeth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = atmeth_rkey(pkt); - qp->resp.resid = sizeof(u64); + qp_resp_from_atmeth(qp, pkt); access = IB_ACCESS_REMOTE_ATOMIC; } else { - return RESPST_EXECUTE; + /* shouldn't happen */ + WARN_ON(1); } - /* A zero-byte op is not required to set an addr or rkey. */ - if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && - (pkt->mask & RXE_RETH_MASK) && - reth_len(pkt) == 0) { + /* A zero-byte read or write op is not required to + * set an addr or rkey. See C9-88 + */ + if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && + (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { + qp->resp.mr = NULL; return RESPST_EXECUTE; } @@ -464,14 +485,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (rkey_is_mw(rkey)) { mw = rxe_lookup_mw(qp, access, rkey); if (!mw) { - pr_err("%s: no MW matches rkey %#x\n", __func__, rkey); + rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } mr = mw->mr; if (!mr) { - pr_err("%s: MW doesn't have an MR\n", __func__); + rxe_dbg_qp(qp, "MW doesn't have an MR\n"); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -479,23 +500,33 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (mw->access & IB_ZERO_BASED) qp->resp.offset = mw->addr; - rxe_drop_ref(mw); - rxe_add_ref(mr); + rxe_get(mr); + rxe_put(mw); + mw = NULL; } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { - pr_err("%s: no MR matches rkey %#x\n", __func__, rkey); + rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } } + if (pkt->mask & RXE_FLUSH_MASK) { + /* FLUSH MR may not set va or resid + * no need to check range since we will flush whole mr + */ + if (feth_sel(pkt) == IB_FLUSH_MR) + goto skip_check_range; + } + if (mr_check_range(mr, va + qp->resp.offset, resid)) { state = RESPST_ERR_RKEY_VIOLATION; goto err; } - if (pkt->mask & RXE_WRITE_MASK) { +skip_check_range: + if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; @@ -522,10 +553,11 @@ static enum resp_states check_rkey(struct rxe_qp *qp, return RESPST_EXECUTE; err: + qp->resp.mr = NULL; if (mr) - rxe_drop_ref(mr); + rxe_put(mr); if (mw) - rxe_drop_ref(mw); + rxe_put(mw); return state; } @@ -536,7 +568,7 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, int err; err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, - data_addr, data_len, RXE_TO_MR_OBJ, NULL); + data_addr, data_len, RXE_TO_MR_OBJ); if (unlikely(err)) return (err == -ENOSPC) ? RESPST_ERR_LENGTH : RESPST_ERR_MALFORMED_WQE; @@ -552,7 +584,7 @@ static enum resp_states write_data_in(struct rxe_qp *qp, int data_len = payload_size(pkt); err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, - payload_addr(pkt), data_len, RXE_TO_MR_OBJ, NULL); + payload_addr(pkt), data_len, RXE_TO_MR_OBJ); if (err) { rc = RESPST_ERR_RKEY_VIOLATION; goto out; @@ -565,61 +597,193 @@ out: return rc; } -/* Guarantee atomicity of atomic operations at the machine level. */ -static DEFINE_SPINLOCK(atomic_ops_lock); +static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + int type) +{ + struct resp_res *res; + u32 pkts; -static enum resp_states process_atomic(struct rxe_qp *qp, + res = &qp->resp.resources[qp->resp.res_head]; + rxe_advance_resp_resource(qp); + free_rd_atomic_resource(res); + + res->type = type; + res->replay = 0; + + switch (type) { + case RXE_READ_MASK: + res->read.va = qp->resp.va + qp->resp.offset; + res->read.va_org = qp->resp.va + qp->resp.offset; + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); + res->first_psn = pkt->psn; + res->cur_psn = pkt->psn; + res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; + + res->state = rdatm_res_state_new; + break; + case RXE_ATOMIC_MASK: + case RXE_ATOMIC_WRITE_MASK: + res->first_psn = pkt->psn; + res->last_psn = pkt->psn; + res->cur_psn = pkt->psn; + break; + case RXE_FLUSH_MASK: + res->flush.va = qp->resp.va + qp->resp.offset; + res->flush.length = qp->resp.length; + res->flush.type = feth_plt(pkt); + res->flush.level = feth_sel(pkt); + } + + return res; +} + +static enum resp_states process_flush(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { - u64 *vaddr; - enum resp_states ret; + u64 length, start; struct rxe_mr *mr = qp->resp.mr; + struct resp_res *res = qp->resp.res; - if (mr->state != RXE_MR_STATE_VALID) { - ret = RESPST_ERR_RKEY_VIOLATION; - goto out; + /* oA19-14, oA19-15 */ + if (res && res->replay) + return RESPST_ACKNOWLEDGE; + else if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); + qp->resp.res = res; } - vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64)); + if (res->flush.level == IB_FLUSH_RANGE) { + start = res->flush.va; + length = res->flush.length; + } else { /* level == IB_FLUSH_MR */ + start = mr->ibmr.iova; + length = mr->ibmr.length; + } - /* check vaddr is 8 bytes aligned. */ - if (!vaddr || (uintptr_t)vaddr & 7) { - ret = RESPST_ERR_MISALIGNED_ATOMIC; - goto out; + if (res->flush.type & IB_FLUSH_PERSISTENT) { + if (rxe_flush_pmem_iova(mr, start, length)) + return RESPST_ERR_RKEY_VIOLATION; + /* Make data persistent. */ + wmb(); + } else if (res->flush.type & IB_FLUSH_GLOBAL) { + /* Make data global visibility. */ + wmb(); } - spin_lock_bh(&atomic_ops_lock); + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; - qp->resp.atomic_orig = *vaddr; + return RESPST_ACKNOWLEDGE; +} - if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || - pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { - if (*vaddr == atmeth_comp(pkt)) - *vaddr = atmeth_swap_add(pkt); - } else { - *vaddr += atmeth_swap_add(pkt); +static enum resp_states atomic_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct rxe_mr *mr = qp->resp.mr; + struct resp_res *res = qp->resp.res; + int err; + + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); + qp->resp.res = res; } - spin_unlock_bh(&atomic_ops_lock); + if (!res->replay) { + u64 iova = qp->resp.va + qp->resp.offset; - ret = RESPST_NONE; -out: - return ret; + if (is_odp_mr(mr)) + err = rxe_odp_atomic_op(mr, iova, pkt->opcode, + atmeth_comp(pkt), + atmeth_swap_add(pkt), + &res->atomic.orig_val); + else + err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, + atmeth_comp(pkt), + atmeth_swap_add(pkt), + &res->atomic.orig_val); + if (err) + return err; + + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + } + + return RESPST_ACKNOWLEDGE; +} + +static enum resp_states atomic_write_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct resp_res *res = qp->resp.res; + struct rxe_mr *mr; + u64 value; + u64 iova; + int err; + + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); + qp->resp.res = res; + } + + if (res->replay) + return RESPST_ACKNOWLEDGE; + + mr = qp->resp.mr; + value = *(u64 *)payload_addr(pkt); + iova = qp->resp.va + qp->resp.offset; + + /* See IBA oA19-28 */ + if (unlikely(mr->state != RXE_MR_STATE_VALID)) { + rxe_dbg_mr(mr, "mr not in valid state\n"); + return RESPST_ERR_RKEY_VIOLATION; + } + + if (is_odp_mr(mr)) + err = rxe_odp_do_atomic_write(mr, iova, value); + else + err = rxe_mr_do_atomic_write(mr, iova, value); + if (err) + return err; + + qp->resp.resid = 0; + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + return RESPST_ACKNOWLEDGE; } static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, - struct rxe_pkt_info *pkt, struct rxe_pkt_info *ack, int opcode, int payload, u32 psn, - u8 syndrome, - u32 *crcp) + u8 syndrome) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; - u32 crc = 0; - u32 *p; int paylen; int pad; int err; @@ -649,23 +813,68 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, } if (ack->mask & RXE_ATMACK_MASK) - atmack_set_orig(ack, qp->resp.atomic_orig); + atmack_set_orig(ack, qp->resp.res->atomic.orig_val); - err = rxe_prepare(ack, skb, &crc); + err = rxe_prepare(&qp->pri_av, ack, skb); if (err) { kfree_skb(skb); return NULL; } - if (crcp) { - /* CRC computation will be continued by the caller */ - *crcp = crc; - } else { - p = payload_addr(ack) + payload + bth_pad(ack); - *p = ~crc; + return skb; +} + +/** + * rxe_recheck_mr - revalidate MR from rkey and get a reference + * @qp: the qp + * @rkey: the rkey + * + * This code allows the MR to be invalidated or deregistered or + * the MW if one was used to be invalidated or deallocated. + * It is assumed that the access permissions if originally good + * are OK and the mappings to be unchanged. + * + * TODO: If someone reregisters an MR to change its size or + * access permissions during the processing of an RDMA read + * we should kill the responder resource and complete the + * operation with an error. + * + * Return: mr on success else NULL + */ +static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mr *mr; + struct rxe_mw *mw; + + if (rkey_is_mw(rkey)) { + mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); + if (!mw) + return NULL; + + mr = mw->mr; + if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || + !mr || mr->state != RXE_MR_STATE_VALID) { + rxe_put(mw); + return NULL; + } + + rxe_get(mr); + rxe_put(mw); + + return mr; } - return skb; + mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + if (!mr) + return NULL; + + if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { + rxe_put(mr); + return NULL; + } + + return mr; } /* RDMA read response. If res is not NULL, then we have a current RDMA request @@ -682,55 +891,40 @@ static enum resp_states read_reply(struct rxe_qp *qp, int opcode; int err; struct resp_res *res = qp->resp.res; - u32 icrc; - u32 *p; + struct rxe_mr *mr; if (!res) { - /* This is the first time we process that request. Get a - * resource - */ - res = &qp->resp.resources[qp->resp.res_head]; - - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); - - res->type = RXE_READ_MASK; - res->replay = 0; - - res->read.va = qp->resp.va + - qp->resp.offset; - res->read.va_org = qp->resp.va + - qp->resp.offset; - - res->first_psn = req_pkt->psn; + res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); + qp->resp.res = res; + } - if (reth_len(req_pkt)) { - res->last_psn = (req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1) & BTH_PSN_MASK; + if (res->state == rdatm_res_state_new) { + if (!res->replay || qp->resp.length == 0) { + /* if length == 0 mr will be NULL (is ok) + * otherwise qp->resp.mr holds a ref on mr + * which we transfer to mr and drop below. + */ + mr = qp->resp.mr; + qp->resp.mr = NULL; } else { - res->last_psn = res->first_psn; + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; } - res->cur_psn = req_pkt->psn; - - res->read.resid = qp->resp.resid; - res->read.length = qp->resp.resid; - res->read.rkey = qp->resp.rkey; - - /* note res inherits the reference to mr from qp */ - res->read.mr = qp->resp.mr; - qp->resp.mr = NULL; - qp->resp.res = res; - res->state = rdatm_res_state_new; - } - - if (res->state == rdatm_res_state_new) { if (res->read.resid <= mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; else opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; } else { + /* re-lookup mr from rkey on all later packets. + * length will be non-zero. This can fail if someone + * modifies or destroys the mr since the first packet. + */ + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; + if (res->read.resid > mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; else @@ -741,30 +935,32 @@ static enum resp_states read_reply(struct rxe_qp *qp, payload = min_t(int, res->read.resid, mtu); - skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload, - res->cur_psn, AETH_ACK_UNLIMITED, &icrc); - if (!skb) - return RESPST_ERR_RNR; + skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, + res->cur_psn, AETH_ACK_UNLIMITED); + if (!skb) { + state = RESPST_ERR_RNR; + goto err_out; + } - err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), - payload, RXE_FROM_MR_OBJ, &icrc); - if (err) - pr_err("Failed copying memory\n"); + err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), + payload, RXE_FROM_MR_OBJ); + if (err) { + kfree_skb(skb); + state = RESPST_ERR_RKEY_VIOLATION; + goto err_out; + } if (bth_pad(&ack_pkt)) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); u8 *pad = payload_addr(&ack_pkt) + payload; memset(pad, 0, bth_pad(&ack_pkt)); - icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); } - p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); - *p = ~icrc; + /* rxe_xmit_packet always consumes the skb */ err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) { - pr_err("Failed sending RDMA reply.\n"); - return RESPST_ERR_RNR; + state = RESPST_ERR_RNR; + goto err_out; } res->read.va += payload; @@ -782,6 +978,9 @@ static enum resp_states read_reply(struct rxe_qp *qp, state = RESPST_CLEANUP; } +err_out: + if (mr) + rxe_put(mr); return state; } @@ -804,7 +1003,6 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) if (pkt->mask & RXE_SEND_MASK) { if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { if (skb->protocol == htons(ETH_P_IP)) { memset(&hdr.reserved, 0, @@ -831,9 +1029,11 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.msn++; return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { - err = process_atomic(qp, pkt); - if (err) - return err; + return RESPST_ATOMIC_REPLY; + } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + return RESPST_ATOMIC_WRITE_REPLY; + } else if (pkt->mask & RXE_FLUSH_MASK) { + return RESPST_PROCESS_FLUSH; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -847,6 +1047,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ERR_INVALIDATE_RKEY; } + if (pkt->mask & RXE_END_MASK) + /* We successfully processed this new request. */ + qp->resp.msn++; + /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; @@ -854,11 +1058,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) { - /* We successfully processed this new request. */ - qp->resp.msn++; + if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; - } else if (qp_type(qp) == IB_QPT_RC) + else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; @@ -872,6 +1074,7 @@ static enum resp_states do_complete(struct rxe_qp *qp, struct ib_uverbs_wc *uwc = &cqe.uibwc; struct rxe_recv_wqe *wqe = qp->resp.wqe; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + unsigned long flags; if (!wqe) goto finish; @@ -893,7 +1096,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, wc->opcode = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; - wc->vendor_err = 0; wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? qp->resp.length : wqe->dma.length - wqe->dma.resid; @@ -914,8 +1116,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, uwc->ex.invalidate_rkey = ieth_rkey(pkt); } - uwc->qp_num = qp->ibqp.qp_num; - if (pkt->mask & RXE_DETH_MASK) uwc->src_qp = deth_sqp(pkt); @@ -947,18 +1147,17 @@ static enum resp_states do_complete(struct rxe_qp *qp, if (pkt->mask & RXE_DETH_MASK) wc->src_qp = deth_sqp(pkt); - wc->qp = &qp->ibqp; wc->port_num = qp->attr.port_num; } + } else { + if (wc->status != IB_WC_WR_FLUSH_ERR) + rxe_err_qp(qp, "non-flush error status = %d\n", + wc->status); } /* have copy for srq and reference for !srq */ - if (!qp->srq) { - if (qp->is_user) - advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER); - else - advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL); - } + if (!qp->srq) + queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); qp->resp.wqe = NULL; @@ -966,8 +1165,13 @@ static enum resp_states do_complete(struct rxe_qp *qp, return RESPST_ERR_CQ_OVERFLOW; finish: - if (unlikely(qp->resp.state == QP_STATE_ERROR)) + spin_lock_irqsave(&qp->state_lock, flags); + if (unlikely(qp_state(qp) == IB_QPS_ERR)) { + spin_unlock_irqrestore(&qp->state_lock, flags); return RESPST_CHK_RESOURCE; + } + spin_unlock_irqrestore(&qp->state_lock, flags); + if (unlikely(!pkt)) return RESPST_DONE; if (qp_type(qp) == IB_QPT_RC) @@ -976,62 +1180,54 @@ finish: return RESPST_CLEANUP; } -static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, - u8 syndrome, u32 psn) + +static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, + int opcode, const char *msg) { - int err = 0; + int err; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, - 0, psn, syndrome, NULL); - if (!skb) { - err = -ENOMEM; - goto err1; - } + skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); + if (!skb) + return -ENOMEM; err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) - pr_err_ratelimited("Failed sending ack\n"); + rxe_dbg_qp(qp, "Failed sending %s\n", msg); -err1: return err; } -static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, - u8 syndrome) +static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) { - int rc = 0; - struct rxe_pkt_info ack_pkt; - struct sk_buff *skb; - struct resp_res *res; + return send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); +} - skb = prepare_ack_packet(qp, pkt, &ack_pkt, - IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn, - syndrome, NULL); - if (!skb) { - rc = -ENOMEM; - goto out; - } +static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); - res = &qp->resp.resources[qp->resp.res_head]; - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); + /* have to clear this since it is used to trigger + * long read replies + */ + qp->resp.res = NULL; + return ret; +} - skb_get(skb); - res->type = RXE_ATOMIC_MASK; - res->atomic.skb = skb; - res->first_psn = ack_pkt.psn; - res->last_psn = ack_pkt.psn; - res->cur_psn = ack_pkt.psn; - - rc = rxe_xmit_packet(qp, &ack_pkt, skb); - if (rc) { - pr_err_ratelimited("Failed sending ack\n"); - rxe_drop_ref(qp); - } -out: - return rc; +static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, + "RDMA READ response of length zero ACK"); + + /* have to clear this since it is used to trigger + * long read replies + */ + qp->resp.res = NULL; + return ret; } static enum resp_states acknowledge(struct rxe_qp *qp, @@ -1041,11 +1237,13 @@ static enum resp_states acknowledge(struct rxe_qp *qp, return RESPST_CLEANUP; if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) - send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn); + send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) - send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED); + send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); + else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) + send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) - send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn); + send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); return RESPST_CLEANUP; } @@ -1057,13 +1255,13 @@ static enum resp_states cleanup(struct rxe_qp *qp, if (pkt) { skb = skb_dequeue(&qp->req_pkts); - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1098,8 +1296,24 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, if (pkt->mask & RXE_SEND_MASK || pkt->mask & RXE_WRITE_MASK) { /* SEND. Ack again and cleanup. C9-105. */ - send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn); + send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); return RESPST_CLEANUP; + } else if (pkt->mask & RXE_FLUSH_MASK) { + struct resp_res *res; + + /* Find the operation in our list of responder resources. */ + res = find_resource(qp, pkt->psn); + if (res) { + res->replay = 1; + res->cur_psn = pkt->psn; + qp->resp.res = res; + rc = RESPST_PROCESS_FLUSH; + goto out; + } + + /* Resource not found. Class D error. Drop the request. */ + rc = RESPST_CLEANUP; + goto out; } else if (pkt->mask & RXE_READ_MASK) { struct resp_res *res; @@ -1152,14 +1366,13 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, /* Find the operation in our list of responder resources. */ res = find_resource(qp, pkt->psn); if (res) { - skb_get(res->atomic.skb); - /* Resend the result. */ - rc = rxe_xmit_packet(qp, pkt, res->atomic.skb); - if (rc) { - pr_err("Failed resending result. This flow is not handled - skb ignored\n"); - rc = RESPST_CLEANUP; - goto out; - } + res->replay = 1; + res->cur_psn = pkt->psn; + qp->resp.res = res; + rc = pkt->mask & RXE_ATOMIC_MASK ? + RESPST_ATOMIC_REPLY : + RESPST_ATOMIC_WRITE_REPLY; + goto out; } /* Resource not found. Class D error. Drop the request. */ @@ -1207,7 +1420,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1215,54 +1428,107 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } } -static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) +/* drain incoming request packet queue */ +static void drain_req_pkts(struct rxe_qp *qp) { struct sk_buff *skb; - struct rxe_queue *q = qp->rq.queue; while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } +} - if (notify) - return; +/* complete receive wqe with flush error */ +static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe) +{ + struct rxe_cqe cqe = {}; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + int err; - while (!qp->srq && q && queue_head(q, q->type)) - advance_consumer(q, q->type); + if (qp->rcq->is_user) { + uwc->wr_id = wqe->wr_id; + uwc->status = IB_WC_WR_FLUSH_ERR; + uwc->qp_num = qp_num(qp); + } else { + wc->wr_id = wqe->wr_id; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->qp = &qp->ibqp; + } + + err = rxe_cq_post(qp->rcq, &cqe, 0); + if (err) + rxe_dbg_cq(qp->rcq, "post cq failed err = %d\n", err); + + return err; } -int rxe_responder(void *arg) +/* drain and optionally complete the recive queue + * if unable to complete a wqe stop completing and + * just flush the remaining wqes + */ +static void flush_recv_queue(struct rxe_qp *qp, bool notify) { - struct rxe_qp *qp = (struct rxe_qp *)arg; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - enum resp_states state; - struct rxe_pkt_info *pkt = NULL; - int ret = 0; + struct rxe_queue *q = qp->rq.queue; + struct rxe_recv_wqe *wqe; + int err; - rxe_add_ref(qp); + if (qp->srq) { + if (notify && qp->ibqp.event_handler) { + struct ib_event ev; - qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + return; + } - if (!qp->valid) { - ret = -EINVAL; - goto done; + /* recv queue not created. nothing to do. */ + if (!qp->rq.queue) + return; + + while ((wqe = queue_head(q, q->type))) { + if (notify) { + err = flush_recv_wqe(qp, wqe); + if (err) + notify = 0; + } + queue_advance_consumer(q, q->type); } - switch (qp->resp.state) { - case QP_STATE_RESET: - state = RESPST_RESET; - break; + qp->resp.wqe = NULL; +} - default: - state = RESPST_GET_REQ; - break; +int rxe_receiver(struct rxe_qp *qp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + enum resp_states state; + struct rxe_pkt_info *pkt = NULL; + int ret; + unsigned long flags; + + spin_lock_irqsave(&qp->state_lock, flags); + if (!qp->valid || qp_state(qp) == IB_QPS_ERR || + qp_state(qp) == IB_QPS_RESET) { + bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); + + drain_req_pkts(qp); + flush_recv_queue(qp, notify); + spin_unlock_irqrestore(&qp->state_lock, flags); + goto exit; } + spin_unlock_irqrestore(&qp->state_lock, flags); + + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; + + state = RESPST_GET_REQ; while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - resp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); switch (state) { case RESPST_GET_REQ: state = get_req(qp, &pkt); @@ -1280,7 +1546,7 @@ int rxe_responder(void *arg) state = check_resource(qp, pkt); break; case RESPST_CHK_LENGTH: - state = check_length(qp, pkt); + state = rxe_resp_check_length(qp, pkt); break; case RESPST_CHK_RKEY: state = check_rkey(qp, pkt); @@ -1294,6 +1560,15 @@ int rxe_responder(void *arg) case RESPST_READ_REPLY: state = read_reply(qp, pkt); break; + case RESPST_ATOMIC_REPLY: + state = atomic_reply(qp, pkt); + break; + case RESPST_ATOMIC_WRITE_REPLY: + state = atomic_write_reply(qp, pkt); + break; + case RESPST_PROCESS_FLUSH: + state = process_flush(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; @@ -1305,7 +1580,7 @@ int rxe_responder(void *arg) break; case RESPST_ERR_PSN_OUT_OF_SEQ: /* RC only - Class B. Drop packet. */ - send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); + send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); state = RESPST_CLEANUP; break; @@ -1327,7 +1602,7 @@ int rxe_responder(void *arg) if (qp_type(qp) == IB_QPT_RC) { rxe_counter_inc(rxe, RXE_CNT_SND_RNR); /* RC - class B */ - send_ack(qp, pkt, AETH_RNR_NAK | + send_ack(qp, AETH_RNR_NAK | (~AETH_TYPE_MASK & qp->attr.min_rnr_timer), pkt->psn); @@ -1409,14 +1684,9 @@ int rxe_responder(void *arg) goto exit; - case RESPST_RESET: - rxe_drain_req_pkts(qp, false); - qp->resp.wqe = NULL; - goto exit; - case RESPST_ERROR: qp->resp.goto_error = 0; - pr_warn("qp#%d moved to error state\n", qp_num(qp)); + rxe_dbg_qp(qp, "moved to error state\n"); rxe_qp_error(qp); goto exit; @@ -1425,9 +1695,15 @@ int rxe_responder(void *arg) } } + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the work item. A zero return + * will continue looping and return to rxe_responder + */ +done: + ret = 0; + goto out; exit: ret = -EAGAIN; -done: - rxe_drop_ref(qp); +out: return ret; } |
