diff options
Diffstat (limited to 'drivers/infiniband/sw/siw/siw_qp.c')
| -rw-r--r-- | drivers/infiniband/sw/siw/siw_qp.c | 336 |
1 files changed, 159 insertions, 177 deletions
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 875d36d4b1c6..c1e6e7d6e32f 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ /* Copyright (c) 2008-2019, IBM Corporation */ @@ -10,6 +10,7 @@ #include <linux/llist.h> #include <asm/barrier.h> #include <net/tcp.h> +#include <trace/events/sock.h> #include "siw.h" #include "siw_verbs.h" @@ -94,6 +95,8 @@ void siw_qp_llp_data_ready(struct sock *sk) { struct siw_qp *qp; + trace_sk_data_ready(sk); + read_lock(&sk->sk_callback_lock); if (unlikely(!sk->sk_user_data || !sk_to_qp(sk))) @@ -199,57 +202,30 @@ void siw_qp_llp_write_space(struct sock *sk) static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { - irq_size = roundup_pow_of_two(irq_size); - orq_size = roundup_pow_of_two(orq_size); - - qp->attrs.irq_size = irq_size; - qp->attrs.orq_size = orq_size; - - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); - if (!qp->irq) { - siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size); - qp->attrs.irq_size = 0; - return -ENOMEM; + if (irq_size) { + irq_size = roundup_pow_of_two(irq_size); + qp->irq = vcalloc(irq_size, sizeof(struct siw_sqe)); + if (!qp->irq) { + qp->attrs.irq_size = 0; + return -ENOMEM; + } } - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); - if (!qp->orq) { - siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size); - qp->attrs.orq_size = 0; - qp->attrs.irq_size = 0; - vfree(qp->irq); - return -ENOMEM; + if (orq_size) { + orq_size = roundup_pow_of_two(orq_size); + qp->orq = vcalloc(orq_size, sizeof(struct siw_sqe)); + if (!qp->orq) { + qp->attrs.orq_size = 0; + qp->attrs.irq_size = 0; + vfree(qp->irq); + return -ENOMEM; + } } + qp->attrs.irq_size = irq_size; + qp->attrs.orq_size = orq_size; siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size); return 0; } -static int siw_qp_enable_crc(struct siw_qp *qp) -{ - struct siw_rx_stream *c_rx = &qp->rx_stream; - struct siw_iwarp_tx *c_tx = &qp->tx_ctx; - int size; - - if (siw_crypto_shash == NULL) - return -ENOENT; - - size = crypto_shash_descsize(siw_crypto_shash) + - sizeof(struct shash_desc); - - c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); - c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); - if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) { - kfree(c_tx->mpa_crc_hd); - kfree(c_rx->mpa_crc_hd); - c_tx->mpa_crc_hd = NULL; - c_rx->mpa_crc_hd = NULL; - return -ENOMEM; - } - c_tx->mpa_crc_hd->tfm = siw_crypto_shash; - c_rx->mpa_crc_hd->tfm = siw_crypto_shash; - - return 0; -} - /* * Send a non signalled READ or WRITE to peer side as negotiated * with MPAv2 P2P setup protocol. The work request is only created @@ -288,13 +264,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl) if (ctrl & MPA_V2_RDMA_WRITE_RTR) wqe->sqe.opcode = SIW_OP_WRITE; else if (ctrl & MPA_V2_RDMA_READ_RTR) { - struct siw_sqe *rreq; + struct siw_sqe *rreq = NULL; wqe->sqe.opcode = SIW_OP_READ; spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); + if (qp->attrs.orq_size) + rreq = orq_get_free(qp); if (rreq) { siw_read_to_orq(rreq, &wqe->sqe); qp->orq_put++; @@ -579,20 +556,15 @@ void siw_send_terminate(struct siw_qp *qp) term->ctrl.mpa_len = cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE)); - if (qp->tx_ctx.mpa_crc_hd) { - crypto_shash_init(qp->tx_ctx.mpa_crc_hd); - if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd, - (u8 *)iov[0].iov_base, - iov[0].iov_len)) - goto out; - + if (qp->tx_ctx.mpa_crc_enabled) { + siw_crc_init(&qp->tx_ctx.mpa_crc); + siw_crc_update(&qp->tx_ctx.mpa_crc, + iov[0].iov_base, iov[0].iov_len); if (num_frags == 3) { - if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd, - (u8 *)iov[1].iov_base, - iov[1].iov_len)) - goto out; + siw_crc_update(&qp->tx_ctx.mpa_crc, + iov[1].iov_base, iov[1].iov_len); } - crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc); + siw_crc_final(&qp->tx_ctx.mpa_crc, (u8 *)&crc); } rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate); @@ -600,7 +572,6 @@ void siw_send_terminate(struct siw_qp *qp) rv == len_terminate ? "success" : "failure", __rdmap_term_layer(term), __rdmap_term_etype(term), __rdmap_term_ecode(term), rv); -out: kfree(term); kfree(err_hdr); } @@ -639,9 +610,10 @@ static int siw_qp_nextstate_from_idle(struct siw_qp *qp, switch (attrs->state) { case SIW_QP_STATE_RTS: if (attrs->flags & SIW_MPA_CRC) { - rv = siw_qp_enable_crc(qp); - if (rv) - break; + siw_crc_init(&qp->tx_ctx.mpa_crc); + qp->tx_ctx.mpa_crc_enabled = true; + siw_crc_init(&qp->rx_stream.mpa_crc); + qp->rx_stream.mpa_crc_enabled = true; } if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) { siw_dbg_qp(qp, "no socket\n"); @@ -877,135 +849,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) rreq->num_sge = 1; } -/* - * Must be called with SQ locked. - * To avoid complete SQ starvation by constant inbound READ requests, - * the active IRQ will not be served after qp->irq_burst, if the - * SQ has pending work. - */ -int siw_activate_tx(struct siw_qp *qp) +static int siw_activate_tx_from_sq(struct siw_qp *qp) { - struct siw_sqe *irqe, *sqe; + struct siw_sqe *sqe; struct siw_wqe *wqe = tx_wqe(qp); int rv = 1; - irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; - - if (irqe->flags & SIW_WQE_VALID) { - sqe = sq_get_next(qp); - - /* - * Avoid local WQE processing starvation in case - * of constant inbound READ request stream - */ - if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { - qp->irq_burst = 0; - goto skip_irq; - } - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* start READ RESPONSE */ - wqe->sqe.opcode = SIW_OP_READ_RESPONSE; - wqe->sqe.flags = 0; - if (irqe->num_sge) { - wqe->sqe.num_sge = 1; - wqe->sqe.sge[0].length = irqe->sge[0].length; - wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; - wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; - } else { - wqe->sqe.num_sge = 0; - } - - /* Retain original RREQ's message sequence number for - * potential error reporting cases. - */ - wqe->sqe.sge[1].length = irqe->sge[1].length; - - wqe->sqe.rkey = irqe->rkey; - wqe->sqe.raddr = irqe->raddr; + sqe = sq_get_next(qp); + if (!sqe) + return 0; - wqe->processed = 0; - qp->irq_get++; + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; - /* mark current IRQ entry free */ - smp_store_mb(irqe->flags, 0); + /* First copy SQE to kernel private memory */ + memcpy(&wqe->sqe, sqe, sizeof(*sqe)); + if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + rv = -EINVAL; goto out; } - sqe = sq_get_next(qp); - if (sqe) { -skip_irq: - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* First copy SQE to kernel private memory */ - memcpy(&wqe->sqe, sqe, sizeof(*sqe)); - - if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + if (wqe->sqe.flags & SIW_WQE_INLINE) { + if (wqe->sqe.opcode != SIW_OP_SEND && + wqe->sqe.opcode != SIW_OP_WRITE) { rv = -EINVAL; goto out; } - if (wqe->sqe.flags & SIW_WQE_INLINE) { - if (wqe->sqe.opcode != SIW_OP_SEND && - wqe->sqe.opcode != SIW_OP_WRITE) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { - rv = -EINVAL; - goto out; - } - wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; - wqe->sqe.sge[0].lkey = 0; - wqe->sqe.num_sge = 1; + if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { + rv = -EINVAL; + goto out; } - if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { - /* A READ cannot be fenced */ - if (unlikely(wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == - SIW_OP_READ_LOCAL_INV)) { - siw_dbg_qp(qp, "cannot fence read\n"); - rv = -EINVAL; - goto out; - } - spin_lock(&qp->orq_lock); + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].lkey = 0; + wqe->sqe.num_sge = 1; + } + if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { + /* A READ cannot be fenced */ + if (unlikely(wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == + SIW_OP_READ_LOCAL_INV)) { + siw_dbg_qp(qp, "cannot fence read\n"); + rv = -EINVAL; + goto out; + } + spin_lock(&qp->orq_lock); - if (!siw_orq_empty(qp)) { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + if (qp->attrs.orq_size && !siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); - } else if (wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - struct siw_sqe *rreq; + } else if (wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + struct siw_sqe *rreq; - wqe->sqe.num_sge = 1; + if (unlikely(!qp->attrs.orq_size)) { + /* We negotiated not to send READ req's */ + rv = -EINVAL; + goto out; + } + wqe->sqe.num_sge = 1; - spin_lock(&qp->orq_lock); + spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); - if (rreq) { - /* - * Make an immediate copy in ORQ to be ready - * to process loopback READ reply - */ - siw_read_to_orq(rreq, &wqe->sqe); - qp->orq_put++; - } else { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + rreq = orq_get_free(qp); + if (rreq) { + /* + * Make an immediate copy in ORQ to be ready + * to process loopback READ reply + */ + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else { + qp->tx_ctx.orq_fence = 1; + rv = 0; } - - /* Clear SQE, can be re-used by application */ - smp_store_mb(sqe->flags, 0); - qp->sq_get++; - } else { - rv = 0; + spin_unlock(&qp->orq_lock); } + + /* Clear SQE, can be re-used by application */ + smp_store_mb(sqe->flags, 0); + qp->sq_get++; out: if (unlikely(rv < 0)) { siw_dbg_qp(qp, "error %d\n", rv); @@ -1015,6 +940,65 @@ out: } /* + * Must be called with SQ locked. + * To avoid complete SQ starvation by constant inbound READ requests, + * the active IRQ will not be served after qp->irq_burst, if the + * SQ has pending work. + */ +int siw_activate_tx(struct siw_qp *qp) +{ + struct siw_sqe *irqe; + struct siw_wqe *wqe = tx_wqe(qp); + + if (!qp->attrs.irq_size) + return siw_activate_tx_from_sq(qp); + + irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; + + if (!(irqe->flags & SIW_WQE_VALID)) + return siw_activate_tx_from_sq(qp); + + /* + * Avoid local WQE processing starvation in case + * of constant inbound READ request stream + */ + if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { + qp->irq_burst = 0; + return siw_activate_tx_from_sq(qp); + } + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* start READ RESPONSE */ + wqe->sqe.opcode = SIW_OP_READ_RESPONSE; + wqe->sqe.flags = 0; + if (irqe->num_sge) { + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = irqe->sge[0].length; + wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; + } else { + wqe->sqe.num_sge = 0; + } + + /* Retain original RREQ's message sequence number for + * potential error reporting cases. + */ + wqe->sqe.sge[1].length = irqe->sge[1].length; + + wqe->sqe.rkey = irqe->rkey; + wqe->sqe.raddr = irqe->raddr; + + wqe->processed = 0; + qp->irq_get++; + + /* mark current IRQ entry free */ + smp_store_mb(irqe->flags, 0); + + return 1; +} + +/* * Check if current CQ state qualifies for calling CQ completion * handler. Must be called with CQ lock held. */ @@ -1167,7 +1151,7 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, /* * siw_sq_flush() * - * Flush SQ and ORRQ entries to CQ. + * Flush SQ and ORQ entries to CQ. * * Must be called with QP state write lock held. * Therefore, SQ and ORQ lock must not be taken. @@ -1329,8 +1313,6 @@ void siw_free_qp(struct kref *ref) vfree(qp->orq); siw_put_tx_cpu(qp->tx_cpu); - + complete(&qp->qp_free); atomic_dec(&sdev->num_qp); - siw_dbg_qp(qp, "free QP\n"); - kfree_rcu(qp, rcu); } |
