summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw/rxe/rxe_resp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_resp.c')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c978
1 files changed, 627 insertions, 351 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 3743dc39b60c..711f73e0bbb1 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -10,40 +10,6 @@
#include "rxe_loc.h"
#include "rxe_queue.h"
-enum resp_states {
- RESPST_NONE,
- RESPST_GET_REQ,
- RESPST_CHK_PSN,
- RESPST_CHK_OP_SEQ,
- RESPST_CHK_OP_VALID,
- RESPST_CHK_RESOURCE,
- RESPST_CHK_LENGTH,
- RESPST_CHK_RKEY,
- RESPST_EXECUTE,
- RESPST_READ_REPLY,
- RESPST_COMPLETE,
- RESPST_ACKNOWLEDGE,
- RESPST_CLEANUP,
- RESPST_DUPLICATE_REQUEST,
- RESPST_ERR_MALFORMED_WQE,
- RESPST_ERR_UNSUPPORTED_OPCODE,
- RESPST_ERR_MISALIGNED_ATOMIC,
- RESPST_ERR_PSN_OUT_OF_SEQ,
- RESPST_ERR_MISSING_OPCODE_FIRST,
- RESPST_ERR_MISSING_OPCODE_LAST_C,
- RESPST_ERR_MISSING_OPCODE_LAST_D1E,
- RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
- RESPST_ERR_RNR,
- RESPST_ERR_RKEY_VIOLATION,
- RESPST_ERR_INVALIDATE_RKEY,
- RESPST_ERR_LENGTH,
- RESPST_ERR_CQ_OVERFLOW,
- RESPST_ERROR,
- RESPST_RESET,
- RESPST_DONE,
- RESPST_EXIT,
-};
-
static char *resp_state_name[] = {
[RESPST_NONE] = "NONE",
[RESPST_GET_REQ] = "GET_REQ",
@@ -55,6 +21,9 @@ static char *resp_state_name[] = {
[RESPST_CHK_RKEY] = "CHK_RKEY",
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
+ [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
+ [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
+ [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -73,7 +42,6 @@ static char *resp_state_name[] = {
[RESPST_ERR_LENGTH] = "ERR_LENGTH",
[RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW",
[RESPST_ERROR] = "ERROR",
- [RESPST_RESET] = "RESET",
[RESPST_DONE] = "DONE",
[RESPST_EXIT] = "EXIT",
};
@@ -81,15 +49,8 @@ static char *resp_state_name[] = {
/* rxe_recv calls here to add a request packet to the input queue */
void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
{
- int must_sched;
- struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
-
skb_queue_tail(&qp->req_pkts, skb);
-
- must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
- (skb_queue_len(&qp->req_pkts) > 1);
-
- rxe_run_task(&qp->resp.task, must_sched);
+ rxe_sched_task(&qp->recv_task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
@@ -97,17 +58,6 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
{
struct sk_buff *skb;
- if (qp->resp.state == QP_STATE_ERROR) {
- while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- ib_device_put(qp->ibqp.device);
- }
-
- /* go drain recv wr queue */
- return RESPST_CHK_RESOURCE;
- }
-
skb = skb_peek(&qp->req_pkts);
if (!skb)
return RESPST_EXIT;
@@ -251,19 +201,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
}
}
+static bool check_qp_attr_access(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ if (((pkt->mask & RXE_READ_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
+ ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
+ ((pkt->mask & RXE_ATOMIC_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ return false;
+
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if ((flush_type & IB_FLUSH_GLOBAL &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
+ (flush_type & IB_FLUSH_PERSISTENT &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
+ return false;
+ }
+
+ return true;
+}
+
static enum resp_states check_op_valid(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (((pkt->mask & RXE_READ_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
- ((pkt->mask & RXE_WRITE_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
- ((pkt->mask & RXE_ATOMIC_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
+ if (!check_qp_attr_access(qp, pkt))
return RESPST_ERR_UNSUPPORTED_OPCODE;
- }
break;
@@ -277,7 +245,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
break;
case IB_QPT_UD:
- case IB_QPT_SMI:
case IB_QPT_GSI:
break;
@@ -297,49 +264,42 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
struct ib_event ev;
unsigned int count;
size_t size;
+ unsigned long flags;
if (srq->error)
return RESPST_ERR_RNR;
- spin_lock_bh(&srq->rq.consumer_lock);
+ spin_lock_irqsave(&srq->rq.consumer_lock, flags);
- if (qp->is_user)
- wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
- else
- wqe = queue_head(q, QUEUE_TYPE_KERNEL);
+ wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
if (!wqe) {
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
return RESPST_ERR_RNR;
}
/* don't trust user space data */
if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
- spin_unlock_bh(&srq->rq.consumer_lock);
- pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
+ rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
return RESPST_ERR_MALFORMED_WQE;
}
- size = sizeof(wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
+ size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
memcpy(&qp->resp.srq_wqe, wqe, size);
qp->resp.wqe = &qp->resp.srq_wqe.wqe;
- if (qp->is_user) {
- advance_consumer(q, QUEUE_TYPE_FROM_USER);
- count = queue_count(q, QUEUE_TYPE_FROM_USER);
- } else {
- advance_consumer(q, QUEUE_TYPE_KERNEL);
- count = queue_count(q, QUEUE_TYPE_KERNEL);
- }
+ queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
+ count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
srq->limit = 0;
goto event;
}
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
return RESPST_CHK_LENGTH;
event:
- spin_unlock_bh(&srq->rq.consumer_lock);
+ spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
@@ -352,29 +312,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
{
struct rxe_srq *srq = qp->srq;
- if (qp->resp.state == QP_STATE_ERROR) {
- if (qp->resp.wqe) {
- qp->resp.status = IB_WC_WR_FLUSH_ERR;
- return RESPST_COMPLETE;
- } else if (!srq) {
- if (qp->is_user)
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_FROM_USER);
- else
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_KERNEL);
- if (qp->resp.wqe) {
- qp->resp.status = IB_WC_WR_FLUSH_ERR;
- return RESPST_COMPLETE;
- } else {
- return RESPST_EXIT;
- }
- } else {
- return RESPST_EXIT;
- }
- }
-
- if (pkt->mask & RXE_READ_OR_ATOMIC) {
+ if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
* recycle the responder resource queue
@@ -389,33 +327,106 @@ static enum resp_states check_resource(struct rxe_qp *qp,
if (srq)
return get_srq_wqe(qp);
- if (qp->is_user)
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_FROM_USER);
- else
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_KERNEL);
+ qp->resp.wqe = queue_head(qp->rq.queue,
+ QUEUE_TYPE_FROM_CLIENT);
return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
}
return RESPST_CHK_LENGTH;
}
-static enum resp_states check_length(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
+static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
{
- switch (qp_type(qp)) {
- case IB_QPT_RC:
- return RESPST_CHK_RKEY;
+ /*
+ * See IBA C9-92
+ * For UD QPs we only check if the packet will fit in the
+ * receive buffer later. For RDMA operations additional
+ * length checks are performed in check_rkey.
+ */
+ if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) {
+ unsigned int payload = payload_size(pkt);
+ unsigned int recv_buffer_len = 0;
+ int i;
+
+ for (i = 0; i < qp->resp.wqe->dma.num_sge; i++)
+ recv_buffer_len += qp->resp.wqe->dma.sge[i].length;
+ if (payload + sizeof(union rdma_network_hdr) > recv_buffer_len) {
+ rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n");
+ return RESPST_ERR_LENGTH;
+ }
+ }
- case IB_QPT_UC:
- return RESPST_CHK_RKEY;
+ if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
+ (qp_type(qp) == IB_QPT_UC))) {
+ unsigned int mtu = qp->mtu;
+ unsigned int payload = payload_size(pkt);
- default:
- return RESPST_CHK_RKEY;
+ if ((pkt->mask & RXE_START_MASK) &&
+ (pkt->mask & RXE_END_MASK)) {
+ if (unlikely(payload > mtu)) {
+ rxe_dbg_qp(qp, "only packet too long\n");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if ((pkt->mask & RXE_START_MASK) ||
+ (pkt->mask & RXE_MIDDLE_MASK)) {
+ if (unlikely(payload != mtu)) {
+ rxe_dbg_qp(qp, "first or middle packet not mtu\n");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if (pkt->mask & RXE_END_MASK) {
+ if (unlikely((payload == 0) || (payload > mtu))) {
+ rxe_dbg_qp(qp, "last packet zero or too long\n");
+ return RESPST_ERR_LENGTH;
+ }
+ }
+ }
+
+ /* See IBA C9-94 */
+ if (pkt->mask & RXE_RETH_MASK) {
+ if (reth_len(pkt) > (1U << 31)) {
+ rxe_dbg_qp(qp, "dma length too long\n");
+ return RESPST_ERR_LENGTH;
+ }
}
+
+ if (pkt->mask & RXE_RDMA_OP_MASK)
+ return RESPST_CHK_RKEY;
+ else
+ return RESPST_EXECUTE;
+}
+
+/* if the reth length field is zero we can assume nothing
+ * about the rkey value and should not validate or use it.
+ * Instead set qp->resp.rkey to 0 which is an invalid rkey
+ * value since the minimum index part is 1.
+ */
+static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ unsigned int length = reth_len(pkt);
+
+ qp->resp.va = reth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.resid = length;
+ qp->resp.length = length;
+ if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
+ qp->resp.rkey = 0;
+ else
+ qp->resp.rkey = reth_rkey(pkt);
}
+static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ qp->resp.va = atmeth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.rkey = atmeth_rkey(pkt);
+ qp->resp.resid = sizeof(u64);
+}
+
+/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
+ * if an invalid rkey is received or the rdma length is zero. For middle
+ * or last packets use the stored value of mr.
+ */
static enum resp_states check_rkey(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
@@ -427,32 +438,42 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
u32 pktlen;
int mtu = qp->mtu;
enum resp_states state;
- int access;
-
- if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
- if (pkt->mask & RXE_RETH_MASK) {
- qp->resp.va = reth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = reth_rkey(pkt);
- qp->resp.resid = reth_len(pkt);
- qp->resp.length = reth_len(pkt);
- }
+ int access = 0;
+
+ /* parse RETH or ATMETH header for first/only packets
+ * for va, length, rkey, etc. or use current value for
+ * middle/last packets.
+ */
+ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
: IB_ACCESS_REMOTE_WRITE;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
+ if (flush_type & IB_FLUSH_GLOBAL)
+ access |= IB_ACCESS_FLUSH_GLOBAL;
+ if (flush_type & IB_FLUSH_PERSISTENT)
+ access |= IB_ACCESS_FLUSH_PERSISTENT;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- qp->resp.va = atmeth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = atmeth_rkey(pkt);
- qp->resp.resid = sizeof(u64);
+ qp_resp_from_atmeth(qp, pkt);
access = IB_ACCESS_REMOTE_ATOMIC;
} else {
- return RESPST_EXECUTE;
+ /* shouldn't happen */
+ WARN_ON(1);
}
- /* A zero-byte op is not required to set an addr or rkey. */
- if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) &&
- (pkt->mask & RXE_RETH_MASK) &&
- reth_len(pkt) == 0) {
+ /* A zero-byte read or write op is not required to
+ * set an addr or rkey. See C9-88
+ */
+ if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
+ (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
+ qp->resp.mr = NULL;
return RESPST_EXECUTE;
}
@@ -464,14 +485,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
- pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
+ rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
mr = mw->mr;
if (!mr) {
- pr_err("%s: MW doesn't have an MR\n", __func__);
+ rxe_dbg_qp(qp, "MW doesn't have an MR\n");
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -479,23 +500,33 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (mw->access & IB_ZERO_BASED)
qp->resp.offset = mw->addr;
- rxe_drop_ref(mw);
- rxe_add_ref(mr);
+ rxe_get(mr);
+ rxe_put(mw);
+ mw = NULL;
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
- pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
+ rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
}
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ /* FLUSH MR may not set va or resid
+ * no need to check range since we will flush whole mr
+ */
+ if (feth_sel(pkt) == IB_FLUSH_MR)
+ goto skip_check_range;
+ }
+
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
- if (pkt->mask & RXE_WRITE_MASK) {
+skip_check_range:
+ if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
@@ -522,10 +553,11 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
return RESPST_EXECUTE;
err:
+ qp->resp.mr = NULL;
if (mr)
- rxe_drop_ref(mr);
+ rxe_put(mr);
if (mw)
- rxe_drop_ref(mw);
+ rxe_put(mw);
return state;
}
@@ -536,7 +568,7 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
int err;
err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
- data_addr, data_len, RXE_TO_MR_OBJ, NULL);
+ data_addr, data_len, RXE_TO_MR_OBJ);
if (unlikely(err))
return (err == -ENOSPC) ? RESPST_ERR_LENGTH
: RESPST_ERR_MALFORMED_WQE;
@@ -552,7 +584,7 @@ static enum resp_states write_data_in(struct rxe_qp *qp,
int data_len = payload_size(pkt);
err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
- payload_addr(pkt), data_len, RXE_TO_MR_OBJ, NULL);
+ payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
if (err) {
rc = RESPST_ERR_RKEY_VIOLATION;
goto out;
@@ -565,61 +597,193 @@ out:
return rc;
}
-/* Guarantee atomicity of atomic operations at the machine level. */
-static DEFINE_SPINLOCK(atomic_ops_lock);
+static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ int type)
+{
+ struct resp_res *res;
+ u32 pkts;
-static enum resp_states process_atomic(struct rxe_qp *qp,
+ res = &qp->resp.resources[qp->resp.res_head];
+ rxe_advance_resp_resource(qp);
+ free_rd_atomic_resource(res);
+
+ res->type = type;
+ res->replay = 0;
+
+ switch (type) {
+ case RXE_READ_MASK:
+ res->read.va = qp->resp.va + qp->resp.offset;
+ res->read.va_org = qp->resp.va + qp->resp.offset;
+ res->read.resid = qp->resp.resid;
+ res->read.length = qp->resp.resid;
+ res->read.rkey = qp->resp.rkey;
+
+ pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
+ res->first_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
+
+ res->state = rdatm_res_state_new;
+ break;
+ case RXE_ATOMIC_MASK:
+ case RXE_ATOMIC_WRITE_MASK:
+ res->first_psn = pkt->psn;
+ res->last_psn = pkt->psn;
+ res->cur_psn = pkt->psn;
+ break;
+ case RXE_FLUSH_MASK:
+ res->flush.va = qp->resp.va + qp->resp.offset;
+ res->flush.length = qp->resp.length;
+ res->flush.type = feth_plt(pkt);
+ res->flush.level = feth_sel(pkt);
+ }
+
+ return res;
+}
+
+static enum resp_states process_flush(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
- u64 *vaddr;
- enum resp_states ret;
+ u64 length, start;
struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
- if (mr->state != RXE_MR_STATE_VALID) {
- ret = RESPST_ERR_RKEY_VIOLATION;
- goto out;
+ /* oA19-14, oA19-15 */
+ if (res && res->replay)
+ return RESPST_ACKNOWLEDGE;
+ else if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
+ qp->resp.res = res;
}
- vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
+ if (res->flush.level == IB_FLUSH_RANGE) {
+ start = res->flush.va;
+ length = res->flush.length;
+ } else { /* level == IB_FLUSH_MR */
+ start = mr->ibmr.iova;
+ length = mr->ibmr.length;
+ }
- /* check vaddr is 8 bytes aligned. */
- if (!vaddr || (uintptr_t)vaddr & 7) {
- ret = RESPST_ERR_MISALIGNED_ATOMIC;
- goto out;
+ if (res->flush.type & IB_FLUSH_PERSISTENT) {
+ if (rxe_flush_pmem_iova(mr, start, length))
+ return RESPST_ERR_RKEY_VIOLATION;
+ /* Make data persistent. */
+ wmb();
+ } else if (res->flush.type & IB_FLUSH_GLOBAL) {
+ /* Make data global visibility. */
+ wmb();
}
- spin_lock_bh(&atomic_ops_lock);
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
- qp->resp.atomic_orig = *vaddr;
+ return RESPST_ACKNOWLEDGE;
+}
- if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
- pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
- if (*vaddr == atmeth_comp(pkt))
- *vaddr = atmeth_swap_add(pkt);
- } else {
- *vaddr += atmeth_swap_add(pkt);
+static enum resp_states atomic_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
+ int err;
+
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
+ qp->resp.res = res;
}
- spin_unlock_bh(&atomic_ops_lock);
+ if (!res->replay) {
+ u64 iova = qp->resp.va + qp->resp.offset;
- ret = RESPST_NONE;
-out:
- return ret;
+ if (is_odp_mr(mr))
+ err = rxe_odp_atomic_op(mr, iova, pkt->opcode,
+ atmeth_comp(pkt),
+ atmeth_swap_add(pkt),
+ &res->atomic.orig_val);
+ else
+ err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
+ atmeth_comp(pkt),
+ atmeth_swap_add(pkt),
+ &res->atomic.orig_val);
+ if (err)
+ return err;
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+ }
+
+ return RESPST_ACKNOWLEDGE;
+}
+
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ struct resp_res *res = qp->resp.res;
+ struct rxe_mr *mr;
+ u64 value;
+ u64 iova;
+ int err;
+
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+ qp->resp.res = res;
+ }
+
+ if (res->replay)
+ return RESPST_ACKNOWLEDGE;
+
+ mr = qp->resp.mr;
+ value = *(u64 *)payload_addr(pkt);
+ iova = qp->resp.va + qp->resp.offset;
+
+ /* See IBA oA19-28 */
+ if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
+ rxe_dbg_mr(mr, "mr not in valid state\n");
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+
+ if (is_odp_mr(mr))
+ err = rxe_odp_do_atomic_write(mr, iova, value);
+ else
+ err = rxe_mr_do_atomic_write(mr, iova, value);
+ if (err)
+ return err;
+
+ qp->resp.resid = 0;
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
}
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt,
struct rxe_pkt_info *ack,
int opcode,
int payload,
u32 psn,
- u8 syndrome,
- u32 *crcp)
+ u8 syndrome)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct sk_buff *skb;
- u32 crc = 0;
- u32 *p;
int paylen;
int pad;
int err;
@@ -649,23 +813,68 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
}
if (ack->mask & RXE_ATMACK_MASK)
- atmack_set_orig(ack, qp->resp.atomic_orig);
+ atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
- err = rxe_prepare(ack, skb, &crc);
+ err = rxe_prepare(&qp->pri_av, ack, skb);
if (err) {
kfree_skb(skb);
return NULL;
}
- if (crcp) {
- /* CRC computation will be continued by the caller */
- *crcp = crc;
- } else {
- p = payload_addr(ack) + payload + bth_pad(ack);
- *p = ~crc;
+ return skb;
+}
+
+/**
+ * rxe_recheck_mr - revalidate MR from rkey and get a reference
+ * @qp: the qp
+ * @rkey: the rkey
+ *
+ * This code allows the MR to be invalidated or deregistered or
+ * the MW if one was used to be invalidated or deallocated.
+ * It is assumed that the access permissions if originally good
+ * are OK and the mappings to be unchanged.
+ *
+ * TODO: If someone reregisters an MR to change its size or
+ * access permissions during the processing of an RDMA read
+ * we should kill the responder resource and complete the
+ * operation with an error.
+ *
+ * Return: mr on success else NULL
+ */
+static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mr *mr;
+ struct rxe_mw *mw;
+
+ if (rkey_is_mw(rkey)) {
+ mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
+ if (!mw)
+ return NULL;
+
+ mr = mw->mr;
+ if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
+ !mr || mr->state != RXE_MR_STATE_VALID) {
+ rxe_put(mw);
+ return NULL;
+ }
+
+ rxe_get(mr);
+ rxe_put(mw);
+
+ return mr;
}
- return skb;
+ mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+ if (!mr)
+ return NULL;
+
+ if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
+ rxe_put(mr);
+ return NULL;
+ }
+
+ return mr;
}
/* RDMA read response. If res is not NULL, then we have a current RDMA request
@@ -682,55 +891,40 @@ static enum resp_states read_reply(struct rxe_qp *qp,
int opcode;
int err;
struct resp_res *res = qp->resp.res;
- u32 icrc;
- u32 *p;
+ struct rxe_mr *mr;
if (!res) {
- /* This is the first time we process that request. Get a
- * resource
- */
- res = &qp->resp.resources[qp->resp.res_head];
-
- free_rd_atomic_resource(qp, res);
- rxe_advance_resp_resource(qp);
-
- res->type = RXE_READ_MASK;
- res->replay = 0;
-
- res->read.va = qp->resp.va +
- qp->resp.offset;
- res->read.va_org = qp->resp.va +
- qp->resp.offset;
-
- res->first_psn = req_pkt->psn;
+ res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
+ qp->resp.res = res;
+ }
- if (reth_len(req_pkt)) {
- res->last_psn = (req_pkt->psn +
- (reth_len(req_pkt) + mtu - 1) /
- mtu - 1) & BTH_PSN_MASK;
+ if (res->state == rdatm_res_state_new) {
+ if (!res->replay || qp->resp.length == 0) {
+ /* if length == 0 mr will be NULL (is ok)
+ * otherwise qp->resp.mr holds a ref on mr
+ * which we transfer to mr and drop below.
+ */
+ mr = qp->resp.mr;
+ qp->resp.mr = NULL;
} else {
- res->last_psn = res->first_psn;
+ mr = rxe_recheck_mr(qp, res->read.rkey);
+ if (!mr)
+ return RESPST_ERR_RKEY_VIOLATION;
}
- res->cur_psn = req_pkt->psn;
-
- res->read.resid = qp->resp.resid;
- res->read.length = qp->resp.resid;
- res->read.rkey = qp->resp.rkey;
-
- /* note res inherits the reference to mr from qp */
- res->read.mr = qp->resp.mr;
- qp->resp.mr = NULL;
- qp->resp.res = res;
- res->state = rdatm_res_state_new;
- }
-
- if (res->state == rdatm_res_state_new) {
if (res->read.resid <= mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
else
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
} else {
+ /* re-lookup mr from rkey on all later packets.
+ * length will be non-zero. This can fail if someone
+ * modifies or destroys the mr since the first packet.
+ */
+ mr = rxe_recheck_mr(qp, res->read.rkey);
+ if (!mr)
+ return RESPST_ERR_RKEY_VIOLATION;
+
if (res->read.resid > mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
else
@@ -741,30 +935,32 @@ static enum resp_states read_reply(struct rxe_qp *qp,
payload = min_t(int, res->read.resid, mtu);
- skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,
- res->cur_psn, AETH_ACK_UNLIMITED, &icrc);
- if (!skb)
- return RESPST_ERR_RNR;
+ skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
+ res->cur_psn, AETH_ACK_UNLIMITED);
+ if (!skb) {
+ state = RESPST_ERR_RNR;
+ goto err_out;
+ }
- err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
- payload, RXE_FROM_MR_OBJ, &icrc);
- if (err)
- pr_err("Failed copying memory\n");
+ err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
+ payload, RXE_FROM_MR_OBJ);
+ if (err) {
+ kfree_skb(skb);
+ state = RESPST_ERR_RKEY_VIOLATION;
+ goto err_out;
+ }
if (bth_pad(&ack_pkt)) {
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
u8 *pad = payload_addr(&ack_pkt) + payload;
memset(pad, 0, bth_pad(&ack_pkt));
- icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt));
}
- p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
- *p = ~icrc;
+ /* rxe_xmit_packet always consumes the skb */
err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err) {
- pr_err("Failed sending RDMA reply.\n");
- return RESPST_ERR_RNR;
+ state = RESPST_ERR_RNR;
+ goto err_out;
}
res->read.va += payload;
@@ -782,6 +978,9 @@ static enum resp_states read_reply(struct rxe_qp *qp,
state = RESPST_CLEANUP;
}
+err_out:
+ if (mr)
+ rxe_put(mr);
return state;
}
@@ -804,7 +1003,6 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
if (pkt->mask & RXE_SEND_MASK) {
if (qp_type(qp) == IB_QPT_UD ||
- qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
if (skb->protocol == htons(ETH_P_IP)) {
memset(&hdr.reserved, 0,
@@ -831,9 +1029,11 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.msn++;
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- err = process_atomic(qp, pkt);
- if (err)
- return err;
+ return RESPST_ATOMIC_REPLY;
+ } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ return RESPST_ATOMIC_WRITE_REPLY;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ return RESPST_PROCESS_FLUSH;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -847,6 +1047,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_ERR_INVALIDATE_RKEY;
}
+ if (pkt->mask & RXE_END_MASK)
+ /* We successfully processed this new request. */
+ qp->resp.msn++;
+
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.ack_psn = qp->resp.psn;
@@ -854,11 +1058,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
- if (pkt->mask & RXE_COMP_MASK) {
- /* We successfully processed this new request. */
- qp->resp.msn++;
+ if (pkt->mask & RXE_COMP_MASK)
return RESPST_COMPLETE;
- } else if (qp_type(qp) == IB_QPT_RC)
+ else if (qp_type(qp) == IB_QPT_RC)
return RESPST_ACKNOWLEDGE;
else
return RESPST_CLEANUP;
@@ -872,6 +1074,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
if (!wqe)
goto finish;
@@ -893,7 +1096,6 @@ static enum resp_states do_complete(struct rxe_qp *qp,
wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
pkt->mask & RXE_WRITE_MASK) ?
IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
- wc->vendor_err = 0;
wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
pkt->mask & RXE_WRITE_MASK) ?
qp->resp.length : wqe->dma.length - wqe->dma.resid;
@@ -914,8 +1116,6 @@ static enum resp_states do_complete(struct rxe_qp *qp,
uwc->ex.invalidate_rkey = ieth_rkey(pkt);
}
- uwc->qp_num = qp->ibqp.qp_num;
-
if (pkt->mask & RXE_DETH_MASK)
uwc->src_qp = deth_sqp(pkt);
@@ -947,18 +1147,17 @@ static enum resp_states do_complete(struct rxe_qp *qp,
if (pkt->mask & RXE_DETH_MASK)
wc->src_qp = deth_sqp(pkt);
- wc->qp = &qp->ibqp;
wc->port_num = qp->attr.port_num;
}
+ } else {
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ rxe_err_qp(qp, "non-flush error status = %d\n",
+ wc->status);
}
/* have copy for srq and reference for !srq */
- if (!qp->srq) {
- if (qp->is_user)
- advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER);
- else
- advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL);
- }
+ if (!qp->srq)
+ queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
qp->resp.wqe = NULL;
@@ -966,8 +1165,13 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERR_CQ_OVERFLOW;
finish:
- if (unlikely(qp->resp.state == QP_STATE_ERROR))
+ spin_lock_irqsave(&qp->state_lock, flags);
+ if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return RESPST_CHK_RESOURCE;
+ }
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+
if (unlikely(!pkt))
return RESPST_DONE;
if (qp_type(qp) == IB_QPT_RC)
@@ -976,62 +1180,54 @@ finish:
return RESPST_CLEANUP;
}
-static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome, u32 psn)
+
+static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
+ int opcode, const char *msg)
{
- int err = 0;
+ int err;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
- 0, psn, syndrome, NULL);
- if (!skb) {
- err = -ENOMEM;
- goto err1;
- }
+ skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
+ if (!skb)
+ return -ENOMEM;
err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
- pr_err_ratelimited("Failed sending ack\n");
+ rxe_dbg_qp(qp, "Failed sending %s\n", msg);
-err1:
return err;
}
-static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
- u8 syndrome)
+static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
{
- int rc = 0;
- struct rxe_pkt_info ack_pkt;
- struct sk_buff *skb;
- struct resp_res *res;
+ return send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
+}
- skb = prepare_ack_packet(qp, pkt, &ack_pkt,
- IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,
- syndrome, NULL);
- if (!skb) {
- rc = -ENOMEM;
- goto out;
- }
+static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+ int ret = send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
- res = &qp->resp.resources[qp->resp.res_head];
- free_rd_atomic_resource(qp, res);
- rxe_advance_resp_resource(qp);
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
+ return ret;
+}
- skb_get(skb);
- res->type = RXE_ATOMIC_MASK;
- res->atomic.skb = skb;
- res->first_psn = ack_pkt.psn;
- res->last_psn = ack_pkt.psn;
- res->cur_psn = ack_pkt.psn;
-
- rc = rxe_xmit_packet(qp, &ack_pkt, skb);
- if (rc) {
- pr_err_ratelimited("Failed sending ack\n");
- rxe_drop_ref(qp);
- }
-out:
- return rc;
+static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+ int ret = send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
+ "RDMA READ response of length zero ACK");
+
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
+ return ret;
}
static enum resp_states acknowledge(struct rxe_qp *qp,
@@ -1041,11 +1237,13 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
return RESPST_CLEANUP;
if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
- send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn);
+ send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
- send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED);
+ send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
+ else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
+ send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
return RESPST_CLEANUP;
}
@@ -1057,13 +1255,13 @@ static enum resp_states cleanup(struct rxe_qp *qp,
if (pkt) {
skb = skb_dequeue(&qp->req_pkts);
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -1098,8 +1296,24 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (pkt->mask & RXE_SEND_MASK ||
pkt->mask & RXE_WRITE_MASK) {
/* SEND. Ack again and cleanup. C9-105. */
- send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn);
+ send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ struct resp_res *res;
+
+ /* Find the operation in our list of responder resources. */
+ res = find_resource(qp, pkt->psn);
+ if (res) {
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = RESPST_PROCESS_FLUSH;
+ goto out;
+ }
+
+ /* Resource not found. Class D error. Drop the request. */
+ rc = RESPST_CLEANUP;
+ goto out;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@@ -1152,14 +1366,13 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn);
if (res) {
- skb_get(res->atomic.skb);
- /* Resend the result. */
- rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
- if (rc) {
- pr_err("Failed resending result. This flow is not handled - skb ignored\n");
- rc = RESPST_CLEANUP;
- goto out;
- }
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = pkt->mask & RXE_ATOMIC_MASK ?
+ RESPST_ATOMIC_REPLY :
+ RESPST_ATOMIC_WRITE_REPLY;
+ goto out;
}
/* Resource not found. Class D error. Drop the request. */
@@ -1207,7 +1420,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
if (qp->resp.mr) {
- rxe_drop_ref(qp->resp.mr);
+ rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
@@ -1215,54 +1428,107 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
}
-static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
+/* drain incoming request packet queue */
+static void drain_req_pkts(struct rxe_qp *qp)
{
struct sk_buff *skb;
- struct rxe_queue *q = qp->rq.queue;
while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
+ rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
+}
- if (notify)
- return;
+/* complete receive wqe with flush error */
+static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe)
+{
+ struct rxe_cqe cqe = {};
+ struct ib_wc *wc = &cqe.ibwc;
+ struct ib_uverbs_wc *uwc = &cqe.uibwc;
+ int err;
- while (!qp->srq && q && queue_head(q, q->type))
- advance_consumer(q, q->type);
+ if (qp->rcq->is_user) {
+ uwc->wr_id = wqe->wr_id;
+ uwc->status = IB_WC_WR_FLUSH_ERR;
+ uwc->qp_num = qp_num(qp);
+ } else {
+ wc->wr_id = wqe->wr_id;
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->qp = &qp->ibqp;
+ }
+
+ err = rxe_cq_post(qp->rcq, &cqe, 0);
+ if (err)
+ rxe_dbg_cq(qp->rcq, "post cq failed err = %d\n", err);
+
+ return err;
}
-int rxe_responder(void *arg)
+/* drain and optionally complete the recive queue
+ * if unable to complete a wqe stop completing and
+ * just flush the remaining wqes
+ */
+static void flush_recv_queue(struct rxe_qp *qp, bool notify)
{
- struct rxe_qp *qp = (struct rxe_qp *)arg;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- enum resp_states state;
- struct rxe_pkt_info *pkt = NULL;
- int ret = 0;
+ struct rxe_queue *q = qp->rq.queue;
+ struct rxe_recv_wqe *wqe;
+ int err;
- rxe_add_ref(qp);
+ if (qp->srq) {
+ if (notify && qp->ibqp.event_handler) {
+ struct ib_event ev;
- qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ return;
+ }
- if (!qp->valid) {
- ret = -EINVAL;
- goto done;
+ /* recv queue not created. nothing to do. */
+ if (!qp->rq.queue)
+ return;
+
+ while ((wqe = queue_head(q, q->type))) {
+ if (notify) {
+ err = flush_recv_wqe(qp, wqe);
+ if (err)
+ notify = 0;
+ }
+ queue_advance_consumer(q, q->type);
}
- switch (qp->resp.state) {
- case QP_STATE_RESET:
- state = RESPST_RESET;
- break;
+ qp->resp.wqe = NULL;
+}
- default:
- state = RESPST_GET_REQ;
- break;
+int rxe_receiver(struct rxe_qp *qp)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ enum resp_states state;
+ struct rxe_pkt_info *pkt = NULL;
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
+ if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
+ qp_state(qp) == IB_QPS_RESET) {
+ bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
+
+ drain_req_pkts(qp);
+ flush_recv_queue(qp, notify);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+ goto exit;
}
+ spin_unlock_irqrestore(&qp->state_lock, flags);
+
+ qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
+
+ state = RESPST_GET_REQ;
while (1) {
- pr_debug("qp#%d state = %s\n", qp_num(qp),
- resp_state_name[state]);
+ rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
switch (state) {
case RESPST_GET_REQ:
state = get_req(qp, &pkt);
@@ -1280,7 +1546,7 @@ int rxe_responder(void *arg)
state = check_resource(qp, pkt);
break;
case RESPST_CHK_LENGTH:
- state = check_length(qp, pkt);
+ state = rxe_resp_check_length(qp, pkt);
break;
case RESPST_CHK_RKEY:
state = check_rkey(qp, pkt);
@@ -1294,6 +1560,15 @@ int rxe_responder(void *arg)
case RESPST_READ_REPLY:
state = read_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_REPLY:
+ state = atomic_reply(qp, pkt);
+ break;
+ case RESPST_ATOMIC_WRITE_REPLY:
+ state = atomic_write_reply(qp, pkt);
+ break;
+ case RESPST_PROCESS_FLUSH:
+ state = process_flush(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@@ -1305,7 +1580,7 @@ int rxe_responder(void *arg)
break;
case RESPST_ERR_PSN_OUT_OF_SEQ:
/* RC only - Class B. Drop packet. */
- send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
+ send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
state = RESPST_CLEANUP;
break;
@@ -1327,7 +1602,7 @@ int rxe_responder(void *arg)
if (qp_type(qp) == IB_QPT_RC) {
rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
/* RC - class B */
- send_ack(qp, pkt, AETH_RNR_NAK |
+ send_ack(qp, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
qp->attr.min_rnr_timer),
pkt->psn);
@@ -1409,14 +1684,9 @@ int rxe_responder(void *arg)
goto exit;
- case RESPST_RESET:
- rxe_drain_req_pkts(qp, false);
- qp->resp.wqe = NULL;
- goto exit;
-
case RESPST_ERROR:
qp->resp.goto_error = 0;
- pr_warn("qp#%d moved to error state\n", qp_num(qp));
+ rxe_dbg_qp(qp, "moved to error state\n");
rxe_qp_error(qp);
goto exit;
@@ -1425,9 +1695,15 @@ int rxe_responder(void *arg)
}
}
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the work item. A zero return
+ * will continue looping and return to rxe_responder
+ */
+done:
+ ret = 0;
+ goto out;
exit:
ret = -EAGAIN;
-done:
- rxe_drop_ref(qp);
+out:
return ret;
}