diff options
Diffstat (limited to 'drivers/infiniband/sw/siw/siw_qp_tx.c')
| -rw-r--r-- | drivers/infiniband/sw/siw/siw_qp_tx.c | 180 |
1 files changed, 102 insertions, 78 deletions
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 4b292e0504f1..f7dd32c6e5ba 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ /* Copyright (c) 2008-2019, IBM Corporation */ @@ -34,6 +34,15 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) return NULL; } +static struct page *siw_get_page(struct siw_mem *mem, struct siw_sge *sge, + unsigned long offset, int *pbl_idx) +{ + if (!mem->is_pbl) + return siw_get_upage(mem->umem, sge->laddr + offset); + else + return siw_get_pblpage(mem, sge->laddr + offset, pbl_idx); +} + /* * Copy short payload at provided destination payload address */ @@ -67,11 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) char *buffer; int pbl_idx = 0; - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, sge->laddr); - else - p = siw_get_pblpage(mem, sge->laddr, &pbl_idx); - + p = siw_get_page(mem, sge, 0, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -85,13 +90,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) memcpy(paddr, buffer + off, part); kunmap_local(buffer); - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, - sge->laddr + part); - else - p = siw_get_pblpage(mem, - sge->laddr + part, - &pbl_idx); + p = siw_get_page(mem, sge, part, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -249,14 +248,8 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) /* * Do complete CRC if enabled and short packet */ - if (c_tx->mpa_crc_hd) { - crypto_shash_init(c_tx->mpa_crc_hd); - if (crypto_shash_update(c_tx->mpa_crc_hd, - (u8 *)&c_tx->pkt, - c_tx->ctrl_len)) - return -EINVAL; - crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc); - } + if (c_tx->mpa_crc_enabled) + siw_crc_oneshot(&c_tx->pkt, c_tx->ctrl_len, (u8 *)crc); c_tx->ctrl_len += MPA_CRC_SIZE; return PKT_COMPLETE; @@ -284,6 +277,15 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) return PKT_FRAGMENTED; } +static noinline_for_stack int +siw_sendmsg(struct socket *sock, unsigned int msg_flags, + struct kvec *vec, size_t num, size_t len) +{ + struct msghdr msg = { .msg_flags = msg_flags }; + + return kernel_sendmsg(sock, &msg, vec, num, len); +} + /* * Send out one complete control type FPDU, or header of FPDU carrying * data. Used for fixed sized packets like Read.Requests or zero length @@ -292,13 +294,11 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, int flags) { - struct msghdr msg = { .msg_flags = flags }; struct kvec iov = { .iov_base = (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent, .iov_len = c_tx->ctrl_len - c_tx->ctrl_sent }; - int rv = kernel_sendmsg(s, &msg, &iov, 1, - c_tx->ctrl_len - c_tx->ctrl_sent); + int rv = siw_sendmsg(s, flags, &iov, 1, iov.iov_len); if (rv >= 0) { c_tx->ctrl_sent += rv; @@ -312,7 +312,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, } /* - * 0copy TCP transmit interface: Use do_tcp_sendpages. + * 0copy TCP transmit interface: Use MSG_SPLICE_PAGES. * * Using sendpage to push page by page appears to be less efficient * than using sendmsg, even if data are copied. @@ -323,27 +323,34 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, size_t size) { + struct bio_vec bvec; + struct msghdr msg = { + .msg_flags = (MSG_MORE | MSG_DONTWAIT | MSG_SPLICE_PAGES), + }; struct sock *sk = s->sk; - int i = 0, rv = 0, sent = 0, - flags = MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST; + int i = 0, rv = 0, sent = 0; while (size) { size_t bytes = min_t(size_t, PAGE_SIZE - offset, size); if (size + offset <= PAGE_SIZE) - flags = MSG_MORE | MSG_DONTWAIT; + msg.msg_flags &= ~MSG_MORE; tcp_rate_check_app_limited(sk); + if (!sendpage_ok(page[i])) + msg.msg_flags &= ~MSG_SPLICE_PAGES; + bvec_set_page(&bvec, page[i], bytes, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, bytes); + try_page_again: lock_sock(sk); - rv = do_tcp_sendpages(sk, page[i], offset, bytes, flags); + rv = tcp_sendmsg_locked(sk, &msg, bytes); release_sock(sk); if (rv > 0) { size -= rv; sent += rv; if (rv != bytes) { - offset += rv; bytes -= rv; goto try_page_again; } @@ -427,13 +434,13 @@ static void siw_unmap_pages(struct kvec *iov, unsigned long kmap_mask, int len) * Write out iov referencing hdr, data and trailer of current FPDU. * Update transmit state dependent on write return status */ -static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) +static noinline_for_stack int siw_tx_hdt(struct siw_iwarp_tx *c_tx, + struct socket *s) { struct siw_wqe *wqe = &c_tx->wqe_active; struct siw_sge *sge = &wqe->sqe.sge[c_tx->sge_idx]; struct kvec iov[MAX_ARRAY]; struct page *page_array[MAX_ARRAY]; - struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; int seg = 0, do_crc = c_tx->do_crc, is_kva = 0, rv; unsigned int data_len = c_tx->bytes_unsent, hdr_len = 0, trl_len = 0, @@ -480,9 +487,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) iov[seg].iov_len = sge_len; if (do_crc) - crypto_shash_update(c_tx->mpa_crc_hd, - iov[seg].iov_base, - sge_len); + siw_crc_update(&c_tx->mpa_crc, + iov[seg].iov_base, sge_len); sge_off += sge_len; data_len -= sge_len; seg++; @@ -496,13 +502,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!is_kva) { struct page *p; - if (mem->is_pbl) - p = siw_get_pblpage( - mem, sge->laddr + sge_off, - &pbl_idx); - else - p = siw_get_upage(mem->umem, - sge->laddr + sge_off); + p = siw_get_page(mem, sge, sge_off, &pbl_idx); if (unlikely(!p)) { siw_unmap_pages(iov, kmap_mask, seg); wqe->processed -= c_tx->bytes_unsent; @@ -520,15 +520,14 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) iov[seg].iov_len = plen; if (do_crc) - crypto_shash_update( - c_tx->mpa_crc_hd, + siw_crc_update( + &c_tx->mpa_crc, iov[seg].iov_base, plen); } else if (do_crc) { kaddr = kmap_local_page(p); - crypto_shash_update(c_tx->mpa_crc_hd, - kaddr + fp_off, - plen); + siw_crc_update(&c_tx->mpa_crc, + kaddr + fp_off, plen); kunmap_local(kaddr); } } else { @@ -540,10 +539,9 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) page_array[seg] = ib_virt_dma_to_page(va); if (do_crc) - crypto_shash_update( - c_tx->mpa_crc_hd, - ib_virt_dma_to_ptr(va), - plen); + siw_crc_update(&c_tx->mpa_crc, + ib_virt_dma_to_ptr(va), + plen); } sge_len -= plen; @@ -580,14 +578,14 @@ sge_done: if (c_tx->pad) { *(u32 *)c_tx->trailer.pad = 0; if (do_crc) - crypto_shash_update(c_tx->mpa_crc_hd, - (u8 *)&c_tx->trailer.crc - c_tx->pad, - c_tx->pad); + siw_crc_update(&c_tx->mpa_crc, + (u8 *)&c_tx->trailer.crc - c_tx->pad, + c_tx->pad); } - if (!c_tx->mpa_crc_hd) + if (!c_tx->mpa_crc_enabled) c_tx->trailer.crc = 0; else if (do_crc) - crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc); + siw_crc_final(&c_tx->mpa_crc, (u8 *)&c_tx->trailer.crc); data_len = c_tx->bytes_unsent; @@ -595,14 +593,16 @@ sge_done: rv = siw_0copy_tx(s, page_array, &wqe->sqe.sge[c_tx->sge_idx], c_tx->sge_off, data_len); if (rv == data_len) { - rv = kernel_sendmsg(s, &msg, &iov[seg], 1, trl_len); + + rv = siw_sendmsg(s, MSG_DONTWAIT | MSG_EOR, &iov[seg], + 1, trl_len); if (rv > 0) rv += data_len; else rv = data_len; } } else { - rv = kernel_sendmsg(s, &msg, iov, seg + 1, + rv = siw_sendmsg(s, MSG_DONTWAIT | MSG_EOR, iov, seg + 1, hdr_len + data_len + trl_len); siw_unmap_pages(iov, kmap_mask, seg); } @@ -740,10 +740,9 @@ static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe) /* * Init MPA CRC computation */ - if (c_tx->mpa_crc_hd) { - crypto_shash_init(c_tx->mpa_crc_hd); - crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, - c_tx->ctrl_len); + if (c_tx->mpa_crc_enabled) { + siw_crc_init(&c_tx->mpa_crc); + siw_crc_update(&c_tx->mpa_crc, &c_tx->pkt, c_tx->ctrl_len); c_tx->do_crc = 1; } } @@ -1003,13 +1002,12 @@ static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe) * MPA FPDUs, each containing a DDP segment. * * SQ processing may occur in user context as a result of posting - * new WQE's or from siw_sq_work_handler() context. Processing in + * new WQE's or from siw_tx_thread context. Processing in * user context is limited to non-kernel verbs users. * * SQ processing may get paused anytime, possibly in the middle of a WR * or FPDU, if insufficient send space is available. SQ processing - * gets resumed from siw_sq_work_handler(), if send space becomes - * available again. + * gets resumed from siw_tx_thread, if send space becomes available again. * * Must be called with the QP state read-locked. * @@ -1202,10 +1200,45 @@ struct tx_task_t { static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); -void siw_stop_tx_thread(int nr_cpu) +int siw_create_tx_threads(void) +{ + int cpu, assigned = 0; + + for_each_online_cpu(cpu) { + struct tx_task_t *tx_task; + + /* Skip HT cores */ + if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) + continue; + + tx_task = &per_cpu(siw_tx_task_g, cpu); + init_llist_head(&tx_task->active); + init_waitqueue_head(&tx_task->waiting); + + siw_tx_thread[cpu] = + kthread_run_on_cpu(siw_run_sq, + (unsigned long *)(long)cpu, + cpu, "siw_tx/%u"); + if (IS_ERR(siw_tx_thread[cpu])) { + siw_tx_thread[cpu] = NULL; + continue; + } + assigned++; + } + return assigned; +} + +void siw_stop_tx_threads(void) { - kthread_stop(siw_tx_thread[nr_cpu]); - wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting); + int cpu; + + for_each_possible_cpu(cpu) { + if (siw_tx_thread[cpu]) { + kthread_stop(siw_tx_thread[cpu]); + wake_up(&per_cpu(siw_tx_task_g, cpu).waiting); + siw_tx_thread[cpu] = NULL; + } + } } int siw_run_sq(void *data) @@ -1215,9 +1248,6 @@ int siw_run_sq(void *data) struct siw_qp *qp; struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu); - init_llist_head(&tx_task->active); - init_waitqueue_head(&tx_task->waiting); - while (1) { struct llist_node *fifo_list = NULL; @@ -1233,13 +1263,7 @@ int siw_run_sq(void *data) * llist_del_all returns a list with newest entry first. * Re-order list for fairness among QP's. */ - while (active) { - struct llist_node *tmp = active; - - active = llist_next(active); - tmp->next = fifo_list; - fifo_list = tmp; - } + fifo_list = llist_reverse_order(active); while (fifo_list) { qp = container_of(fifo_list, struct siw_qp, tx_list); fifo_list = llist_next(fifo_list); |
