summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw/siw/siw_qp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw/siw/siw_qp.c')
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c336
1 files changed, 159 insertions, 177 deletions
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 875d36d4b1c6..c1e6e7d6e32f 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
/* Copyright (c) 2008-2019, IBM Corporation */
@@ -10,6 +10,7 @@
#include <linux/llist.h>
#include <asm/barrier.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include "siw.h"
#include "siw_verbs.h"
@@ -94,6 +95,8 @@ void siw_qp_llp_data_ready(struct sock *sk)
{
struct siw_qp *qp;
+ trace_sk_data_ready(sk);
+
read_lock(&sk->sk_callback_lock);
if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
@@ -199,57 +202,30 @@ void siw_qp_llp_write_space(struct sock *sk)
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
{
- irq_size = roundup_pow_of_two(irq_size);
- orq_size = roundup_pow_of_two(orq_size);
-
- qp->attrs.irq_size = irq_size;
- qp->attrs.orq_size = orq_size;
-
- qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
- if (!qp->irq) {
- siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
- qp->attrs.irq_size = 0;
- return -ENOMEM;
+ if (irq_size) {
+ irq_size = roundup_pow_of_two(irq_size);
+ qp->irq = vcalloc(irq_size, sizeof(struct siw_sqe));
+ if (!qp->irq) {
+ qp->attrs.irq_size = 0;
+ return -ENOMEM;
+ }
}
- qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
- if (!qp->orq) {
- siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
- qp->attrs.orq_size = 0;
- qp->attrs.irq_size = 0;
- vfree(qp->irq);
- return -ENOMEM;
+ if (orq_size) {
+ orq_size = roundup_pow_of_two(orq_size);
+ qp->orq = vcalloc(orq_size, sizeof(struct siw_sqe));
+ if (!qp->orq) {
+ qp->attrs.orq_size = 0;
+ qp->attrs.irq_size = 0;
+ vfree(qp->irq);
+ return -ENOMEM;
+ }
}
+ qp->attrs.irq_size = irq_size;
+ qp->attrs.orq_size = orq_size;
siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
return 0;
}
-static int siw_qp_enable_crc(struct siw_qp *qp)
-{
- struct siw_rx_stream *c_rx = &qp->rx_stream;
- struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
- int size;
-
- if (siw_crypto_shash == NULL)
- return -ENOENT;
-
- size = crypto_shash_descsize(siw_crypto_shash) +
- sizeof(struct shash_desc);
-
- c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
- c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
- if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
- kfree(c_tx->mpa_crc_hd);
- kfree(c_rx->mpa_crc_hd);
- c_tx->mpa_crc_hd = NULL;
- c_rx->mpa_crc_hd = NULL;
- return -ENOMEM;
- }
- c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
- c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
-
- return 0;
-}
-
/*
* Send a non signalled READ or WRITE to peer side as negotiated
* with MPAv2 P2P setup protocol. The work request is only created
@@ -288,13 +264,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
if (ctrl & MPA_V2_RDMA_WRITE_RTR)
wqe->sqe.opcode = SIW_OP_WRITE;
else if (ctrl & MPA_V2_RDMA_READ_RTR) {
- struct siw_sqe *rreq;
+ struct siw_sqe *rreq = NULL;
wqe->sqe.opcode = SIW_OP_READ;
spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
+ if (qp->attrs.orq_size)
+ rreq = orq_get_free(qp);
if (rreq) {
siw_read_to_orq(rreq, &wqe->sqe);
qp->orq_put++;
@@ -579,20 +556,15 @@ void siw_send_terminate(struct siw_qp *qp)
term->ctrl.mpa_len =
cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
- if (qp->tx_ctx.mpa_crc_hd) {
- crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
- if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
- (u8 *)iov[0].iov_base,
- iov[0].iov_len))
- goto out;
-
+ if (qp->tx_ctx.mpa_crc_enabled) {
+ siw_crc_init(&qp->tx_ctx.mpa_crc);
+ siw_crc_update(&qp->tx_ctx.mpa_crc,
+ iov[0].iov_base, iov[0].iov_len);
if (num_frags == 3) {
- if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
- (u8 *)iov[1].iov_base,
- iov[1].iov_len))
- goto out;
+ siw_crc_update(&qp->tx_ctx.mpa_crc,
+ iov[1].iov_base, iov[1].iov_len);
}
- crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
+ siw_crc_final(&qp->tx_ctx.mpa_crc, (u8 *)&crc);
}
rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
@@ -600,7 +572,6 @@ void siw_send_terminate(struct siw_qp *qp)
rv == len_terminate ? "success" : "failure",
__rdmap_term_layer(term), __rdmap_term_etype(term),
__rdmap_term_ecode(term), rv);
-out:
kfree(term);
kfree(err_hdr);
}
@@ -639,9 +610,10 @@ static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
switch (attrs->state) {
case SIW_QP_STATE_RTS:
if (attrs->flags & SIW_MPA_CRC) {
- rv = siw_qp_enable_crc(qp);
- if (rv)
- break;
+ siw_crc_init(&qp->tx_ctx.mpa_crc);
+ qp->tx_ctx.mpa_crc_enabled = true;
+ siw_crc_init(&qp->rx_stream.mpa_crc);
+ qp->rx_stream.mpa_crc_enabled = true;
}
if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
siw_dbg_qp(qp, "no socket\n");
@@ -877,135 +849,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
rreq->num_sge = 1;
}
-/*
- * Must be called with SQ locked.
- * To avoid complete SQ starvation by constant inbound READ requests,
- * the active IRQ will not be served after qp->irq_burst, if the
- * SQ has pending work.
- */
-int siw_activate_tx(struct siw_qp *qp)
+static int siw_activate_tx_from_sq(struct siw_qp *qp)
{
- struct siw_sqe *irqe, *sqe;
+ struct siw_sqe *sqe;
struct siw_wqe *wqe = tx_wqe(qp);
int rv = 1;
- irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
-
- if (irqe->flags & SIW_WQE_VALID) {
- sqe = sq_get_next(qp);
-
- /*
- * Avoid local WQE processing starvation in case
- * of constant inbound READ request stream
- */
- if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
- qp->irq_burst = 0;
- goto skip_irq;
- }
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* start READ RESPONSE */
- wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
- wqe->sqe.flags = 0;
- if (irqe->num_sge) {
- wqe->sqe.num_sge = 1;
- wqe->sqe.sge[0].length = irqe->sge[0].length;
- wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
- wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
- } else {
- wqe->sqe.num_sge = 0;
- }
-
- /* Retain original RREQ's message sequence number for
- * potential error reporting cases.
- */
- wqe->sqe.sge[1].length = irqe->sge[1].length;
-
- wqe->sqe.rkey = irqe->rkey;
- wqe->sqe.raddr = irqe->raddr;
+ sqe = sq_get_next(qp);
+ if (!sqe)
+ return 0;
- wqe->processed = 0;
- qp->irq_get++;
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
- /* mark current IRQ entry free */
- smp_store_mb(irqe->flags, 0);
+ /* First copy SQE to kernel private memory */
+ memcpy(&wqe->sqe, sqe, sizeof(*sqe));
+ if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ rv = -EINVAL;
goto out;
}
- sqe = sq_get_next(qp);
- if (sqe) {
-skip_irq:
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* First copy SQE to kernel private memory */
- memcpy(&wqe->sqe, sqe, sizeof(*sqe));
-
- if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ if (wqe->sqe.flags & SIW_WQE_INLINE) {
+ if (wqe->sqe.opcode != SIW_OP_SEND &&
+ wqe->sqe.opcode != SIW_OP_WRITE) {
rv = -EINVAL;
goto out;
}
- if (wqe->sqe.flags & SIW_WQE_INLINE) {
- if (wqe->sqe.opcode != SIW_OP_SEND &&
- wqe->sqe.opcode != SIW_OP_WRITE) {
- rv = -EINVAL;
- goto out;
- }
- if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
- rv = -EINVAL;
- goto out;
- }
- wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
- wqe->sqe.sge[0].lkey = 0;
- wqe->sqe.num_sge = 1;
+ if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
+ rv = -EINVAL;
+ goto out;
}
- if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
- /* A READ cannot be fenced */
- if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode ==
- SIW_OP_READ_LOCAL_INV)) {
- siw_dbg_qp(qp, "cannot fence read\n");
- rv = -EINVAL;
- goto out;
- }
- spin_lock(&qp->orq_lock);
+ wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
+ wqe->sqe.sge[0].lkey = 0;
+ wqe->sqe.num_sge = 1;
+ }
+ if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
+ /* A READ cannot be fenced */
+ if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode ==
+ SIW_OP_READ_LOCAL_INV)) {
+ siw_dbg_qp(qp, "cannot fence read\n");
+ rv = -EINVAL;
+ goto out;
+ }
+ spin_lock(&qp->orq_lock);
- if (!siw_orq_empty(qp)) {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
- } else if (wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- struct siw_sqe *rreq;
+ } else if (wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+ struct siw_sqe *rreq;
- wqe->sqe.num_sge = 1;
+ if (unlikely(!qp->attrs.orq_size)) {
+ /* We negotiated not to send READ req's */
+ rv = -EINVAL;
+ goto out;
+ }
+ wqe->sqe.num_sge = 1;
- spin_lock(&qp->orq_lock);
+ spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
- if (rreq) {
- /*
- * Make an immediate copy in ORQ to be ready
- * to process loopback READ reply
- */
- siw_read_to_orq(rreq, &wqe->sqe);
- qp->orq_put++;
- } else {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
+ rreq = orq_get_free(qp);
+ if (rreq) {
+ /*
+ * Make an immediate copy in ORQ to be ready
+ * to process loopback READ reply
+ */
+ siw_read_to_orq(rreq, &wqe->sqe);
+ qp->orq_put++;
+ } else {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
}
-
- /* Clear SQE, can be re-used by application */
- smp_store_mb(sqe->flags, 0);
- qp->sq_get++;
- } else {
- rv = 0;
+ spin_unlock(&qp->orq_lock);
}
+
+ /* Clear SQE, can be re-used by application */
+ smp_store_mb(sqe->flags, 0);
+ qp->sq_get++;
out:
if (unlikely(rv < 0)) {
siw_dbg_qp(qp, "error %d\n", rv);
@@ -1015,6 +940,65 @@ out:
}
/*
+ * Must be called with SQ locked.
+ * To avoid complete SQ starvation by constant inbound READ requests,
+ * the active IRQ will not be served after qp->irq_burst, if the
+ * SQ has pending work.
+ */
+int siw_activate_tx(struct siw_qp *qp)
+{
+ struct siw_sqe *irqe;
+ struct siw_wqe *wqe = tx_wqe(qp);
+
+ if (!qp->attrs.irq_size)
+ return siw_activate_tx_from_sq(qp);
+
+ irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
+
+ if (!(irqe->flags & SIW_WQE_VALID))
+ return siw_activate_tx_from_sq(qp);
+
+ /*
+ * Avoid local WQE processing starvation in case
+ * of constant inbound READ request stream
+ */
+ if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
+ qp->irq_burst = 0;
+ return siw_activate_tx_from_sq(qp);
+ }
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
+
+ /* start READ RESPONSE */
+ wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
+ wqe->sqe.flags = 0;
+ if (irqe->num_sge) {
+ wqe->sqe.num_sge = 1;
+ wqe->sqe.sge[0].length = irqe->sge[0].length;
+ wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
+ wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
+ } else {
+ wqe->sqe.num_sge = 0;
+ }
+
+ /* Retain original RREQ's message sequence number for
+ * potential error reporting cases.
+ */
+ wqe->sqe.sge[1].length = irqe->sge[1].length;
+
+ wqe->sqe.rkey = irqe->rkey;
+ wqe->sqe.raddr = irqe->raddr;
+
+ wqe->processed = 0;
+ qp->irq_get++;
+
+ /* mark current IRQ entry free */
+ smp_store_mb(irqe->flags, 0);
+
+ return 1;
+}
+
+/*
* Check if current CQ state qualifies for calling CQ completion
* handler. Must be called with CQ lock held.
*/
@@ -1167,7 +1151,7 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
/*
* siw_sq_flush()
*
- * Flush SQ and ORRQ entries to CQ.
+ * Flush SQ and ORQ entries to CQ.
*
* Must be called with QP state write lock held.
* Therefore, SQ and ORQ lock must not be taken.
@@ -1329,8 +1313,6 @@ void siw_free_qp(struct kref *ref)
vfree(qp->orq);
siw_put_tx_cpu(qp->tx_cpu);
-
+ complete(&qp->qp_free);
atomic_dec(&sdev->num_qp);
- siw_dbg_qp(qp, "free QP\n");
- kfree_rcu(qp, rcu);
}